aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/test
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-11-01 12:46:41 -0700
committerMichael Armbrust <michael@databricks.com>2016-11-01 12:46:41 -0700
commit01dd0083011741c2bbe5ae1d2a25f2c9a1302b76 (patch)
tree7b9993165b1a4f48e64d566d93c7883a3096403d /sql/core/src/test
parentcfac17ee1cec414663b957228e469869eb7673c1 (diff)
downloadspark-01dd0083011741c2bbe5ae1d2a25f2c9a1302b76.tar.gz
spark-01dd0083011741c2bbe5ae1d2a25f2c9a1302b76.tar.bz2
spark-01dd0083011741c2bbe5ae1d2a25f2c9a1302b76.zip
[SPARK-17764][SQL] Add `to_json` supporting to convert nested struct column to JSON string
## What changes were proposed in this pull request? This PR proposes to add `to_json` function in contrast with `from_json` in Scala, Java and Python. It'd be useful if we can convert a same column from/to json. Also, some datasources do not support nested types. If we are forced to save a dataframe into those data sources, we might be able to work around by this function. The usage is as below: ``` scala val df = Seq(Tuple1(Tuple1(1))).toDF("a") df.select(to_json($"a").as("json")).show() ``` ``` bash +--------+ | json| +--------+ |{"_1":1}| +--------+ ``` ## How was this patch tested? Unit tests in `JsonFunctionsSuite` and `JsonExpressionsSuite`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #15354 from HyukjinKwon/SPARK-17764.
Diffstat (limited to 'sql/core/src/test')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala30
1 files changed, 24 insertions, 6 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 518d6e92b2..59ae889cf3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -17,9 +17,9 @@
package org.apache.spark.sql
-import org.apache.spark.sql.functions.from_json
+import org.apache.spark.sql.functions.{from_json, struct, to_json}
import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType}
class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
import testImplicits._
@@ -31,7 +31,6 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row("alice", "5"))
}
-
val tuples: Seq[(String, String)] =
("1", """{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}""") ::
("2", """{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}""") ::
@@ -97,7 +96,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
checkAnswer(expr, expected)
}
- test("json_parser") {
+ test("from_json") {
val df = Seq("""{"a": 1}""").toDS()
val schema = new StructType().add("a", IntegerType)
@@ -106,7 +105,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row(Row(1)) :: Nil)
}
- test("json_parser missing columns") {
+ test("from_json missing columns") {
val df = Seq("""{"a": 1}""").toDS()
val schema = new StructType().add("b", IntegerType)
@@ -115,7 +114,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row(Row(null)) :: Nil)
}
- test("json_parser invalid json") {
+ test("from_json invalid json") {
val df = Seq("""{"a" 1}""").toDS()
val schema = new StructType().add("a", IntegerType)
@@ -123,4 +122,23 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
df.select(from_json($"value", schema)),
Row(null) :: Nil)
}
+
+ test("to_json") {
+ val df = Seq(Tuple1(Tuple1(1))).toDF("a")
+
+ checkAnswer(
+ df.select(to_json($"a")),
+ Row("""{"_1":1}""") :: Nil)
+ }
+
+ test("to_json unsupported type") {
+ val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
+ .select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
+ val e = intercept[AnalysisException]{
+ // Unsupported type throws an exception
+ df.select(to_json($"c")).collect()
+ }
+ assert(e.getMessage.contains(
+ "Unable to convert column a of type calendarinterval to JSON."))
+ }
}