diff options
author | gatorsmile <gatorsmile@gmail.com> | 2015-11-27 22:44:08 -0800 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-11-27 22:44:08 -0800 |
commit | 149cd692ee2e127d79386fd8e584f4f70a2906ba (patch) | |
tree | a45cf5193210c41be7e441b0c9543c3c40c5bd8f /sql | |
parent | b9921524d970f9413039967c1f17ae2e736982f0 (diff) | |
download | spark-149cd692ee2e127d79386fd8e584f4f70a2906ba.tar.gz spark-149cd692ee2e127d79386fd8e584f4f70a2906ba.tar.bz2 spark-149cd692ee2e127d79386fd8e584f4f70a2906ba.zip |
[SPARK-12028] [SQL] get_json_object returns an incorrect result when the value is null literals
When calling `get_json_object` for the following two cases, both results are `"null"`:
```scala
val tuple: Seq[(String, String)] = ("5", """{"f1": null}""") :: Nil
val df: DataFrame = tuple.toDF("key", "jstring")
val res = df.select(functions.get_json_object($"jstring", "$.f1")).collect()
```
```scala
val tuple2: Seq[(String, String)] = ("5", """{"f1": "null"}""") :: Nil
val df2: DataFrame = tuple2.toDF("key", "jstring")
val res3 = df2.select(functions.get_json_object($"jstring", "$.f1")).collect()
```
Fixed the problem and also added a test case.
Author: gatorsmile <gatorsmile@gmail.com>
Closes #10018 from gatorsmile/get_json_object.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala | 7 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala | 20 |
2 files changed, 25 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 8cd73236a7..4991b9cb54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -298,8 +298,11 @@ case class GetJsonObject(json: Expression, path: Expression) case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name => // exact field match - p.nextToken() - evaluatePath(p, g, style, xs) + if (p.nextToken() != JsonToken.VALUE_NULL) { + evaluatePath(p, g, style, xs) + } else { + false + } case (FIELD_NAME, Wildcard :: xs) => // wildcard field match diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 14fd56fc8c..1f384edf32 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -39,6 +39,26 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { ("6", "[invalid JSON string]") :: Nil + test("function get_json_object - null") { + val df: DataFrame = tuples.toDF("key", "jstring") + val expected = + Row("1", "value1", "value2", "3", null, "5.23") :: + Row("2", "value12", "2", "value3", "4.01", null) :: + Row("3", "value13", "2", "value33", "value44", "5.01") :: + Row("4", null, null, null, null, null) :: + Row("5", "", null, null, null, null) :: + Row("6", null, null, null, null, null) :: + Nil + + checkAnswer( + df.select($"key", functions.get_json_object($"jstring", "$.f1"), + functions.get_json_object($"jstring", "$.f2"), + functions.get_json_object($"jstring", "$.f3"), + functions.get_json_object($"jstring", "$.f4"), + functions.get_json_object($"jstring", "$.f5")), + expected) + } + test("json_tuple select") { val df: DataFrame = tuples.toDF("key", "jstring") val expected = |