aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/test
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-03-05 14:35:06 -0800
committerBurak Yavuz <brkyvz@gmail.com>2017-03-05 14:35:06 -0800
commit369a148e591bb16ec7da54867610b207602cd698 (patch)
treee7c2469ce548557bef43d3ccdb0fee6d5c006ec5 /sql/core/src/test
parent80d5338b32e856870cf187ce17bc87335d690761 (diff)
downloadspark-369a148e591bb16ec7da54867610b207602cd698.tar.gz
spark-369a148e591bb16ec7da54867610b207602cd698.tar.bz2
spark-369a148e591bb16ec7da54867610b207602cd698.zip
[SPARK-19595][SQL] Support json array in from_json
## What changes were proposed in this pull request? This PR proposes to both, **Do not allow json arrays with multiple elements and return null in `from_json` with `StructType` as the schema.** Currently, it only reads the single row when the input is a json array. So, the codes below: ```scala import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ val schema = StructType(StructField("a", IntegerType) :: Nil) Seq(("""[{"a": 1}, {"a": 2}]""")).toDF("struct").select(from_json(col("struct"), schema)).show() ``` prints ``` +--------------------+ |jsontostruct(struct)| +--------------------+ | [1]| +--------------------+ ``` This PR simply suggests to print this as `null` if the schema is `StructType` and input is json array.with multiple elements ``` +--------------------+ |jsontostruct(struct)| +--------------------+ | null| +--------------------+ ``` **Support json arrays in `from_json` with `ArrayType` as the schema.** ```scala import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ val schema = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) Seq(("""[{"a": 1}, {"a": 2}]""")).toDF("array").select(from_json(col("array"), schema)).show() ``` prints ``` +-------------------+ |jsontostruct(array)| +-------------------+ | [[1], [2]]| +-------------------+ ``` ## How was this patch tested? Unit test in `JsonExpressionsSuite`, `JsonFunctionsSuite`, Python doctests and manual test. Author: hyukjinkwon <gurwls223@gmail.com> Closes #16929 from HyukjinKwon/disallow-array.
Diffstat (limited to 'sql/core/src/test')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala25
1 files changed, 24 insertions, 1 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 9c39b3c7f0..953d161ec2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
import org.apache.spark.sql.functions.{from_json, struct, to_json}
import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType, TimestampType}
+import org.apache.spark.sql.types._
class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
import testImplicits._
@@ -133,6 +133,29 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row(null) :: Nil)
}
+ test("from_json invalid schema") {
+ val df = Seq("""{"a" 1}""").toDS()
+ val schema = ArrayType(StringType)
+ val message = intercept[AnalysisException] {
+ df.select(from_json($"value", schema))
+ }.getMessage
+
+ assert(message.contains(
+ "Input schema array<string> must be a struct or an array of structs."))
+ }
+
+ test("from_json array support") {
+ val df = Seq("""[{"a": 1, "b": "a"}, {"a": 2}, { }]""").toDS()
+ val schema = ArrayType(
+ StructType(
+ StructField("a", IntegerType) ::
+ StructField("b", StringType) :: Nil))
+
+ checkAnswer(
+ df.select(from_json($"value", schema)),
+ Row(Seq(Row(1, "a"), Row(2, null), Row(null, null))))
+ }
+
test("to_json") {
val df = Seq(Tuple1(Tuple1(1))).toDF("a")