aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2015-11-27 22:44:08 -0800
committerDavies Liu <davies.liu@gmail.com>2015-11-27 22:44:08 -0800
commit149cd692ee2e127d79386fd8e584f4f70a2906ba (patch)
treea45cf5193210c41be7e441b0c9543c3c40c5bd8f
parentb9921524d970f9413039967c1f17ae2e736982f0 (diff)
downloadspark-149cd692ee2e127d79386fd8e584f4f70a2906ba.tar.gz
spark-149cd692ee2e127d79386fd8e584f4f70a2906ba.tar.bz2
spark-149cd692ee2e127d79386fd8e584f4f70a2906ba.zip
[SPARK-12028] [SQL] get_json_object returns an incorrect result when the value is null literals
When calling `get_json_object` for the following two cases, both results are `"null"`: ```scala val tuple: Seq[(String, String)] = ("5", """{"f1": null}""") :: Nil val df: DataFrame = tuple.toDF("key", "jstring") val res = df.select(functions.get_json_object($"jstring", "$.f1")).collect() ``` ```scala val tuple2: Seq[(String, String)] = ("5", """{"f1": "null"}""") :: Nil val df2: DataFrame = tuple2.toDF("key", "jstring") val res3 = df2.select(functions.get_json_object($"jstring", "$.f1")).collect() ``` Fixed the problem and also added a test case. Author: gatorsmile <gatorsmile@gmail.com> Closes #10018 from gatorsmile/get_json_object.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala7
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala20
2 files changed, 25 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 8cd73236a7..4991b9cb54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -298,8 +298,11 @@ case class GetJsonObject(json: Expression, path: Expression)
case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name =>
// exact field match
- p.nextToken()
- evaluatePath(p, g, style, xs)
+ if (p.nextToken() != JsonToken.VALUE_NULL) {
+ evaluatePath(p, g, style, xs)
+ } else {
+ false
+ }
case (FIELD_NAME, Wildcard :: xs) =>
// wildcard field match
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 14fd56fc8c..1f384edf32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -39,6 +39,26 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
("6", "[invalid JSON string]") ::
Nil
+ test("function get_json_object - null") {
+ val df: DataFrame = tuples.toDF("key", "jstring")
+ val expected =
+ Row("1", "value1", "value2", "3", null, "5.23") ::
+ Row("2", "value12", "2", "value3", "4.01", null) ::
+ Row("3", "value13", "2", "value33", "value44", "5.01") ::
+ Row("4", null, null, null, null, null) ::
+ Row("5", "", null, null, null, null) ::
+ Row("6", null, null, null, null, null) ::
+ Nil
+
+ checkAnswer(
+ df.select($"key", functions.get_json_object($"jstring", "$.f1"),
+ functions.get_json_object($"jstring", "$.f2"),
+ functions.get_json_object($"jstring", "$.f3"),
+ functions.get_json_object($"jstring", "$.f4"),
+ functions.get_json_object($"jstring", "$.f5")),
+ expected)
+ }
+
test("json_tuple select") {
val df: DataFrame = tuples.toDF("key", "jstring")
val expected =