aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/main/scala
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-03-05 14:35:06 -0800
committerBurak Yavuz <brkyvz@gmail.com>2017-03-05 14:35:06 -0800
commit369a148e591bb16ec7da54867610b207602cd698 (patch)
treee7c2469ce548557bef43d3ccdb0fee6d5c006ec5 /sql/core/src/main/scala
parent80d5338b32e856870cf187ce17bc87335d690761 (diff)
downloadspark-369a148e591bb16ec7da54867610b207602cd698.tar.gz
spark-369a148e591bb16ec7da54867610b207602cd698.tar.bz2
spark-369a148e591bb16ec7da54867610b207602cd698.zip
[SPARK-19595][SQL] Support json array in from_json
## What changes were proposed in this pull request? This PR proposes to both, **Do not allow json arrays with multiple elements and return null in `from_json` with `StructType` as the schema.** Currently, it only reads the single row when the input is a json array. So, the codes below: ```scala import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ val schema = StructType(StructField("a", IntegerType) :: Nil) Seq(("""[{"a": 1}, {"a": 2}]""")).toDF("struct").select(from_json(col("struct"), schema)).show() ``` prints ``` +--------------------+ |jsontostruct(struct)| +--------------------+ | [1]| +--------------------+ ``` This PR simply suggests to print this as `null` if the schema is `StructType` and input is json array.with multiple elements ``` +--------------------+ |jsontostruct(struct)| +--------------------+ | null| +--------------------+ ``` **Support json arrays in `from_json` with `ArrayType` as the schema.** ```scala import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ val schema = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) Seq(("""[{"a": 1}, {"a": 2}]""")).toDF("array").select(from_json(col("array"), schema)).show() ``` prints ``` +-------------------+ |jsontostruct(array)| +-------------------+ | [[1], [2]]| +-------------------+ ``` ## How was this patch tested? Unit test in `JsonExpressionsSuite`, `JsonFunctionsSuite`, Python doctests and manual test. Author: hyukjinkwon <gurwls223@gmail.com> Closes #16929 from HyukjinKwon/disallow-array.
Diffstat (limited to 'sql/core/src/main/scala')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala52
1 files changed, 47 insertions, 5 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 2247010ac3..201f726db3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2973,7 +2973,22 @@ object functions {
* @group collection_funcs
* @since 2.1.0
*/
- def from_json(e: Column, schema: StructType, options: Map[String, String]): Column = withExpr {
+ def from_json(e: Column, schema: StructType, options: Map[String, String]): Column =
+ from_json(e, schema.asInstanceOf[DataType], options)
+
+ /**
+ * (Scala-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType`
+ * with the specified schema. Returns `null`, in the case of an unparseable string.
+ *
+ * @param e a string column containing JSON data.
+ * @param schema the schema to use when parsing the json string
+ * @param options options to control how the json is parsed. accepts the same options and the
+ * json data source.
+ *
+ * @group collection_funcs
+ * @since 2.2.0
+ */
+ def from_json(e: Column, schema: DataType, options: Map[String, String]): Column = withExpr {
JsonToStruct(schema, options, e.expr)
}
@@ -2993,6 +3008,21 @@ object functions {
from_json(e, schema, options.asScala.toMap)
/**
+ * (Java-specific) Parses a column containing a JSON string into a `StructType` or `ArrayType`
+ * with the specified schema. Returns `null`, in the case of an unparseable string.
+ *
+ * @param e a string column containing JSON data.
+ * @param schema the schema to use when parsing the json string
+ * @param options options to control how the json is parsed. accepts the same options and the
+ * json data source.
+ *
+ * @group collection_funcs
+ * @since 2.2.0
+ */
+ def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column =
+ from_json(e, schema, options.asScala.toMap)
+
+ /**
* Parses a column containing a JSON string into a `StructType` with the specified schema.
* Returns `null`, in the case of an unparseable string.
*
@@ -3006,8 +3036,21 @@ object functions {
from_json(e, schema, Map.empty[String, String])
/**
- * Parses a column containing a JSON string into a `StructType` with the specified schema.
- * Returns `null`, in the case of an unparseable string.
+ * Parses a column containing a JSON string into a `StructType` or `ArrayType`
+ * with the specified schema. Returns `null`, in the case of an unparseable string.
+ *
+ * @param e a string column containing JSON data.
+ * @param schema the schema to use when parsing the json string
+ *
+ * @group collection_funcs
+ * @since 2.2.0
+ */
+ def from_json(e: Column, schema: DataType): Column =
+ from_json(e, schema, Map.empty[String, String])
+
+ /**
+ * Parses a column containing a JSON string into a `StructType` or `ArrayType`
+ * with the specified schema. Returns `null`, in the case of an unparseable string.
*
* @param e a string column containing JSON data.
* @param schema the schema to use when parsing the json string as a json string
@@ -3016,8 +3059,7 @@ object functions {
* @since 2.1.0
*/
def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column =
- from_json(e, DataType.fromJson(schema).asInstanceOf[StructType], options)
-
+ from_json(e, DataType.fromJson(schema), options)
/**
* (Scala-specific) Converts a column containing a `StructType` into a JSON string with the