aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorTakeshi Yamamuro <yamamuro@apache.org>2017-03-17 14:51:59 -0700
committerXiao Li <gatorsmile@gmail.com>2017-03-17 14:51:59 -0700
commit7de66bae58733595cb88ec899640f7acf734d5c4 (patch)
treeeb35312cd6934fe1d27e114df4d6f373f708d737 /sql/core
parentbfdeea5c68f963ce60d48d0aa4a4c8c582169950 (diff)
downloadspark-7de66bae58733595cb88ec899640f7acf734d5c4.tar.gz
spark-7de66bae58733595cb88ec899640f7acf734d5c4.tar.bz2
spark-7de66bae58733595cb88ec899640f7acf734d5c4.zip
[SPARK-19967][SQL] Add from_json in FunctionRegistry
## What changes were proposed in this pull request? This pr added entries in `FunctionRegistry` and supported `from_json` in SQL. ## How was this patch tested? Added tests in `JsonFunctionsSuite` and `SQLQueryTestSuite`. Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #17320 from maropu/SPARK-19967.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/json-functions.sql13
-rw-r--r--sql/core/src/test/resources/sql-tests/results/json-functions.sql.out107
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala36
3 files changed, 154 insertions, 2 deletions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
index 9308560451..83243c5e5a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/json-functions.sql
@@ -5,4 +5,17 @@ select to_json(named_struct('a', 1, 'b', 2));
select to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
-- Check if errors handled
select to_json(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'));
+select to_json(named_struct('a', 1, 'b', 2), map('mode', 1));
select to_json();
+
+-- from_json
+describe function from_json;
+describe function extended from_json;
+select from_json('{"a":1}', 'a INT');
+select from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
+-- Check if errors handled
+select from_json('{"a":1}', 1);
+select from_json('{"a":1}', 'a InvalidType');
+select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'));
+select from_json('{"a":1}', 'a INT', map('mode', 1));
+select from_json();
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index d8aa4fb9fa..b57cbbc1d8 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
+-- Number of queries: 16
-- !query 0
@@ -55,9 +55,112 @@ Must use a map() function for options;; line 1 pos 7
-- !query 5
-select to_json()
+select to_json(named_struct('a', 1, 'b', 2), map('mode', 1))
-- !query 5 schema
struct<>
-- !query 5 output
org.apache.spark.sql.AnalysisException
+A type of keys and values in map() must be string, but got MapType(StringType,IntegerType,false);; line 1 pos 7
+
+
+-- !query 6
+select to_json()
+-- !query 6 schema
+struct<>
+-- !query 6 output
+org.apache.spark.sql.AnalysisException
Invalid number of arguments for function to_json; line 1 pos 7
+
+
+-- !query 7
+describe function from_json
+-- !query 7 schema
+struct<function_desc:string>
+-- !query 7 output
+Class: org.apache.spark.sql.catalyst.expressions.JsonToStruct
+Function: from_json
+Usage: from_json(jsonStr, schema[, options]) - Returns a struct value with the given `jsonStr` and `schema`.
+
+
+-- !query 8
+describe function extended from_json
+-- !query 8 schema
+struct<function_desc:string>
+-- !query 8 output
+Class: org.apache.spark.sql.catalyst.expressions.JsonToStruct
+Extended Usage:
+ Examples:
+ > SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE');
+ {"a":1, "b":0.8}
+ > SELECT from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
+ {"time":"2015-08-26 00:00:00.0"}
+
+Function: from_json
+Usage: from_json(jsonStr, schema[, options]) - Returns a struct value with the given `jsonStr` and `schema`.
+
+
+-- !query 9
+select from_json('{"a":1}', 'a INT')
+-- !query 9 schema
+struct<jsontostruct({"a":1}):struct<a:int>>
+-- !query 9 output
+{"a":1}
+
+
+-- !query 10
+select from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
+-- !query 10 schema
+struct<jsontostruct({"time":"26/08/2015"}):struct<time:timestamp>>
+-- !query 10 output
+{"time":2015-08-26 00:00:00.0}
+
+
+-- !query 11
+select from_json('{"a":1}', 1)
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+Expected a string literal instead of 1;; line 1 pos 7
+
+
+-- !query 12
+select from_json('{"a":1}', 'a InvalidType')
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+
+DataType invalidtype() is not supported.(line 1, pos 2)
+
+== SQL ==
+a InvalidType
+--^^^
+; line 1 pos 7
+
+
+-- !query 13
+select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'))
+-- !query 13 schema
+struct<>
+-- !query 13 output
+org.apache.spark.sql.AnalysisException
+Must use a map() function for options;; line 1 pos 7
+
+
+-- !query 14
+select from_json('{"a":1}', 'a INT', map('mode', 1))
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+A type of keys and values in map() must be string, but got MapType(StringType,IntegerType,false);; line 1 pos 7
+
+
+-- !query 15
+select from_json()
+-- !query 15 schema
+struct<>
+-- !query 15 output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function from_json; line 1 pos 7
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index cdea3b9a0f..2345b82081 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -220,4 +220,40 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
assert(errMsg2.getMessage.startsWith(
"A type of keys and values in map() must be string, but got"))
}
+
+ test("SPARK-19967 Support from_json in SQL") {
+ val df1 = Seq("""{"a": 1}""").toDS()
+ checkAnswer(
+ df1.selectExpr("from_json(value, 'a INT')"),
+ Row(Row(1)) :: Nil)
+
+ val df2 = Seq("""{"c0": "a", "c1": 1, "c2": {"c20": 3.8, "c21": 8}}""").toDS()
+ checkAnswer(
+ df2.selectExpr("from_json(value, 'c0 STRING, c1 INT, c2 STRUCT<c20: DOUBLE, c21: INT>')"),
+ Row(Row("a", 1, Row(3.8, 8))) :: Nil)
+
+ val df3 = Seq("""{"time": "26/08/2015 18:00"}""").toDS()
+ checkAnswer(
+ df3.selectExpr(
+ "from_json(value, 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy HH:mm'))"),
+ Row(Row(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0"))))
+
+ val errMsg1 = intercept[AnalysisException] {
+ df3.selectExpr("from_json(value, 1)")
+ }
+ assert(errMsg1.getMessage.startsWith("Expected a string literal instead of"))
+ val errMsg2 = intercept[AnalysisException] {
+ df3.selectExpr("""from_json(value, 'time InvalidType')""")
+ }
+ assert(errMsg2.getMessage.contains("DataType invalidtype() is not supported"))
+ val errMsg3 = intercept[AnalysisException] {
+ df3.selectExpr("from_json(value, 'time Timestamp', named_struct('a', 1))")
+ }
+ assert(errMsg3.getMessage.startsWith("Must use a map() function for options"))
+ val errMsg4 = intercept[AnalysisException] {
+ df3.selectExpr("from_json(value, 'time Timestamp', map('a', 1))")
+ }
+ assert(errMsg4.getMessage.startsWith(
+ "A type of keys and values in map() must be string, but got"))
+ }
}