aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-08-04 09:07:09 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-04 09:07:09 -0700
commit73dedb589d06f7c7a525cc4f07721a77f480c434 (patch)
treed0fb2518db596194ff7d8de09b20ae239eb53a66 /sql
parentb1f88a38d53aebe7cabb762cdd2f1cc64726b0b4 (diff)
downloadspark-73dedb589d06f7c7a525cc4f07721a77f480c434.tar.gz
spark-73dedb589d06f7c7a525cc4f07721a77f480c434.tar.bz2
spark-73dedb589d06f7c7a525cc4f07721a77f480c434.zip
[SPARK-8246] [SQL] Implement get_json_object
This is based on #7485 , thanks to NathanHowell Tests were copied from Hive, but do not seem to be super comprehensive. I've generally replicated Hive's unusual behavior rather than following a JSONPath reference, except for one case (as noted in the comments). I don't know if there is a way of fully replicating Hive's behavior without a slower TreeNode implementation, so I've erred on the side of performance instead. Author: Davies Liu <davies@databricks.com> Author: Yin Huai <yhuai@databricks.com> Author: Nathan Howell <nhowell@godaddy.com> Closes #7901 from davies/get_json_object and squashes the following commits: 3ace9b9 [Davies Liu] Merge branch 'get_json_object' of github.com:davies/spark into get_json_object 98766fc [Davies Liu] Merge branch 'master' of github.com:apache/spark into get_json_object a7dc6d0 [Davies Liu] Update JsonExpressionsSuite.scala c818519 [Yin Huai] new results. 18ce26b [Davies Liu] fix tests 6ac29fb [Yin Huai] Golden files. 25eebef [Davies Liu] use HiveQuerySuite e0ac6ec [Yin Huai] Golden answer files. 940c060 [Davies Liu] tweat code style 44084c5 [Davies Liu] Merge branch 'master' of github.com:apache/spark into get_json_object 9192d09 [Nathan Howell] Match Hive’s behavior for unwrapping arrays of one element 8dab647 [Nathan Howell] [SPARK-8246] [SQL] Implement get_json_object
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala309
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala202
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala32
-rw-r--r--sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala3
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala6
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f1
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc1
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a1
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb1
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d51
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab1
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da1
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce01
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd98441
-rw-r--r--sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b1
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala51
17 files changed, 613 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6140d1b129..43e3e9b910 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -179,6 +179,7 @@ object FunctionRegistry {
expression[Decode]("decode"),
expression[FindInSet]("find_in_set"),
expression[FormatNumber]("format_number"),
+ expression[GetJsonObject]("get_json_object"),
expression[InitCap]("initcap"),
expression[Lower]("lcase"),
expression[Lower]("lower"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala
new file mode 100644
index 0000000000..23bfa18c94
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.io.{StringWriter, ByteArrayOutputStream}
+
+import com.fasterxml.jackson.core._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{StringType, DataType}
+import org.apache.spark.unsafe.types.UTF8String
+
+import scala.util.parsing.combinator.RegexParsers
+
+private[this] sealed trait PathInstruction
+private[this] object PathInstruction {
+ private[expressions] case object Subscript extends PathInstruction
+ private[expressions] case object Wildcard extends PathInstruction
+ private[expressions] case object Key extends PathInstruction
+ private[expressions] case class Index(index: Long) extends PathInstruction
+ private[expressions] case class Named(name: String) extends PathInstruction
+}
+
+private[this] sealed trait WriteStyle
+private[this] object WriteStyle {
+ private[expressions] case object RawStyle extends WriteStyle
+ private[expressions] case object QuotedStyle extends WriteStyle
+ private[expressions] case object FlattenStyle extends WriteStyle
+}
+
+private[this] object JsonPathParser extends RegexParsers {
+ import PathInstruction._
+
+ def root: Parser[Char] = '$'
+
+ def long: Parser[Long] = "\\d+".r ^? {
+ case x => x.toLong
+ }
+
+ // parse `[*]` and `[123]` subscripts
+ def subscript: Parser[List[PathInstruction]] =
+ for {
+ operand <- '[' ~> ('*' ^^^ Wildcard | long ^^ Index) <~ ']'
+ } yield {
+ Subscript :: operand :: Nil
+ }
+
+ // parse `.name` or `['name']` child expressions
+ def named: Parser[List[PathInstruction]] =
+ for {
+ name <- '.' ~> "[^\\.\\[]+".r | "[\\'" ~> "[^\\'\\?]+" <~ "\\']"
+ } yield {
+ Key :: Named(name) :: Nil
+ }
+
+ // child wildcards: `..`, `.*` or `['*']`
+ def wildcard: Parser[List[PathInstruction]] =
+ (".*" | "['*']") ^^^ List(Wildcard)
+
+ def node: Parser[List[PathInstruction]] =
+ wildcard |
+ named |
+ subscript
+
+ val expression: Parser[List[PathInstruction]] = {
+ phrase(root ~> rep(node) ^^ (x => x.flatten))
+ }
+
+ def parse(str: String): Option[List[PathInstruction]] = {
+ this.parseAll(expression, str) match {
+ case Success(result, _) =>
+ Some(result)
+
+ case NoSuccess(msg, next) =>
+ None
+ }
+ }
+}
+
+private[this] object GetJsonObject {
+ private val jsonFactory = new JsonFactory()
+
+ // Enabled for Hive compatibility
+ jsonFactory.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
+}
+
+/**
+ * Extracts json object from a json string based on json path specified, and returns json string
+ * of the extracted json object. It will return null if the input json string is invalid.
+ */
+case class GetJsonObject(json: Expression, path: Expression)
+ extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
+
+ import GetJsonObject._
+ import PathInstruction._
+ import WriteStyle._
+ import com.fasterxml.jackson.core.JsonToken._
+
+ override def left: Expression = json
+ override def right: Expression = path
+ override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+ override def dataType: DataType = StringType
+ override def prettyName: String = "get_json_object"
+
+ @transient private lazy val parsedPath = parsePath(path.eval().asInstanceOf[UTF8String])
+
+ override def eval(input: InternalRow): Any = {
+ val jsonStr = json.eval(input).asInstanceOf[UTF8String]
+ if (jsonStr == null) {
+ return null
+ }
+
+ val parsed = if (path.foldable) {
+ parsedPath
+ } else {
+ parsePath(path.eval(input).asInstanceOf[UTF8String])
+ }
+
+ if (parsed.isDefined) {
+ try {
+ val parser = jsonFactory.createParser(jsonStr.getBytes)
+ val output = new ByteArrayOutputStream()
+ val generator = jsonFactory.createGenerator(output, JsonEncoding.UTF8)
+ parser.nextToken()
+ val matched = evaluatePath(parser, generator, RawStyle, parsed.get)
+ generator.close()
+ if (matched) {
+ UTF8String.fromBytes(output.toByteArray)
+ } else {
+ null
+ }
+ } catch {
+ case _: JsonProcessingException => null
+ }
+ } else {
+ null
+ }
+ }
+
+ private def parsePath(path: UTF8String): Option[List[PathInstruction]] = {
+ if (path != null) {
+ JsonPathParser.parse(path.toString)
+ } else {
+ None
+ }
+ }
+
+ // advance to the desired array index, assumes to start at the START_ARRAY token
+ private def arrayIndex(p: JsonParser, f: () => Boolean): Long => Boolean = {
+ case _ if p.getCurrentToken == END_ARRAY =>
+ // terminate, nothing has been written
+ false
+
+ case 0 =>
+ // we've reached the desired index
+ val dirty = f()
+
+ while (p.nextToken() != END_ARRAY) {
+ // advance the token stream to the end of the array
+ p.skipChildren()
+ }
+
+ dirty
+
+ case i if i > 0 =>
+ // skip this token and evaluate the next
+ p.skipChildren()
+ p.nextToken()
+ arrayIndex(p, f)(i - 1)
+ }
+
+ /**
+ * Evaluate a list of JsonPath instructions, returning a bool that indicates if any leaf nodes
+ * have been written to the generator
+ */
+ private def evaluatePath(
+ p: JsonParser,
+ g: JsonGenerator,
+ style: WriteStyle,
+ path: List[PathInstruction]): Boolean = {
+ (p.getCurrentToken, path) match {
+ case (VALUE_STRING, Nil) if style == RawStyle =>
+ // there is no array wildcard or slice parent, emit this string without quotes
+ if (p.hasTextCharacters) {
+ g.writeRaw(p.getTextCharacters, p.getTextOffset, p.getTextLength)
+ } else {
+ g.writeRaw(p.getText)
+ }
+ true
+
+ case (START_ARRAY, Nil) if style == FlattenStyle =>
+ // flatten this array into the parent
+ var dirty = false
+ while (p.nextToken() != END_ARRAY) {
+ dirty |= evaluatePath(p, g, style, Nil)
+ }
+ dirty
+
+ case (_, Nil) =>
+ // general case: just copy the child tree verbatim
+ g.copyCurrentStructure(p)
+ true
+
+ case (START_OBJECT, Key :: xs) =>
+ var dirty = false
+ while (p.nextToken() != END_OBJECT) {
+ if (dirty) {
+ // once a match has been found we can skip other fields
+ p.skipChildren()
+ } else {
+ dirty = evaluatePath(p, g, style, xs)
+ }
+ }
+ dirty
+
+ case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs) =>
+ // special handling for the non-structure preserving double wildcard behavior in Hive
+ var dirty = false
+ g.writeStartArray()
+ while (p.nextToken() != END_ARRAY) {
+ dirty |= evaluatePath(p, g, FlattenStyle, xs)
+ }
+ g.writeEndArray()
+ dirty
+
+ case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle =>
+ // retain Flatten, otherwise use Quoted... cannot use Raw within an array
+ val nextStyle = style match {
+ case RawStyle => QuotedStyle
+ case FlattenStyle => FlattenStyle
+ case QuotedStyle => throw new IllegalStateException()
+ }
+
+ // temporarily buffer child matches, the emitted json will need to be
+ // modified slightly if there is only a single element written
+ val buffer = new StringWriter()
+ val flattenGenerator = jsonFactory.createGenerator(buffer)
+ flattenGenerator.writeStartArray()
+
+ var dirty = 0
+ while (p.nextToken() != END_ARRAY) {
+ // track the number of array elements and only emit an outer array if
+ // we've written more than one element, this matches Hive's behavior
+ dirty += (if (evaluatePath(p, flattenGenerator, nextStyle, xs)) 1 else 0)
+ }
+ flattenGenerator.writeEndArray()
+ flattenGenerator.close()
+
+ val buf = buffer.getBuffer
+ if (dirty > 1) {
+ g.writeRawValue(buf.toString)
+ } else if (dirty == 1) {
+ // remove outer array tokens
+ g.writeRawValue(buf.substring(1, buf.length()-1))
+ } // else do not write anything
+
+ dirty > 0
+
+ case (START_ARRAY, Subscript :: Wildcard :: xs) =>
+ var dirty = false
+ g.writeStartArray()
+ while (p.nextToken() != END_ARRAY) {
+ // wildcards can have multiple matches, continually update the dirty count
+ dirty |= evaluatePath(p, g, QuotedStyle, xs)
+ }
+ g.writeEndArray()
+
+ dirty
+
+ case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) =>
+ p.nextToken()
+ // we're going to have 1 or more results, switch to QuotedStyle
+ arrayIndex(p, () => evaluatePath(p, g, QuotedStyle, xs))(idx)
+
+ case (START_ARRAY, Subscript :: Index(idx) :: xs) =>
+ p.nextToken()
+ arrayIndex(p, () => evaluatePath(p, g, style, xs))(idx)
+
+ case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name =>
+ // exact field match
+ p.nextToken()
+ evaluatePath(p, g, style, xs)
+
+ case (FIELD_NAME, Wildcard :: xs) =>
+ // wildcard field match
+ p.nextToken()
+ evaluatePath(p, g, style, xs)
+
+ case _ =>
+ p.skipChildren()
+ false
+ }
+ }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
new file mode 100644
index 0000000000..4addbaf0cb
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+
+class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+ val json =
+ """
+ |{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],
+ |"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees",
+ |"title":"Sayings of the Century","category":"reference","price":8.95},
+ |{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,
+ |"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings",
+ |"category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],
+ |"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},
+ |"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025",
+ |"fb:testid":"1234"}
+ |""".stripMargin
+
+ test("$.store.bicycle") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.bicycle")),
+ """{"price":19.95,"color":"red"}""")
+ }
+
+ test("$.store.book") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book")),
+ """[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference",
+ |"price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction",
+ |"price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":
+ |"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},
+ |{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
+ |""".stripMargin.replace("\n", ""))
+ }
+
+ test("$.store.book[0]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[0]")),
+ """{"author":"Nigel Rees","title":"Sayings of the Century",
+ |"category":"reference","price":8.95}""".stripMargin.replace("\n", ""))
+ }
+
+ test("$.store.book[*]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[*]")),
+ """[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference",
+ |"price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction",
+ |"price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":
+ |"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},
+ |{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
+ |""".stripMargin.replace("\n", ""))
+ }
+
+ test("$") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$")),
+ json.replace("\n", ""))
+ }
+
+ test("$.store.book[0].category") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[0].category")),
+ "reference")
+ }
+
+ test("$.store.book[*].category") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[*].category")),
+ """["reference","fiction","fiction"]""")
+ }
+
+ test("$.store.book[*].isbn") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[*].isbn")),
+ """["0-553-21311-3","0-395-19395-8"]""")
+ }
+
+ test("$.store.book[*].reader") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[*].reader")),
+ """[{"age":25,"name":"bob"},{"age":26,"name":"jack"}]""")
+ }
+
+ test("$.store.basket[0][1]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[0][1]")),
+ "2")
+ }
+
+ test("$.store.basket[*]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[*]")),
+ """[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]]""")
+ }
+
+ test("$.store.basket[*][0]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[*][0]")),
+ "[1,3,5]")
+ }
+
+ test("$.store.basket[0][*]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[0][*]")),
+ """[1,2,{"b":"y","a":"x"}]""")
+ }
+
+ test("$.store.basket[*][*]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[*][*]")),
+ """[1,2,{"b":"y","a":"x"},3,4,5,6]""")
+ }
+
+ test("$.store.basket[0][2].b") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[0][2].b")),
+ "y")
+ }
+
+ test("$.store.basket[0][*].b") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[0][*].b")),
+ """["y"]""")
+ }
+
+ test("$.zip code") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.zip code")),
+ "94025")
+ }
+
+ test("$.fb:testid") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.fb:testid")),
+ "1234")
+ }
+
+ test("preserve newlines") {
+ checkEvaluation(
+ GetJsonObject(Literal("""{"a":"b\nc"}"""), Literal("$.a")),
+ "b\nc")
+ }
+
+ test("escape") {
+ checkEvaluation(
+ GetJsonObject(Literal("""{"a":"b\"c"}"""), Literal("$.a")),
+ "b\"c")
+ }
+
+ test("$.non_exist_key") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.non_exist_key")),
+ null)
+ }
+
+ test("$..no_recursive") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$..no_recursive")),
+ null)
+ }
+
+ test("$.store.book[10]") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[10]")),
+ null)
+ }
+
+ test("$.store.book[0].non_exist_key") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.book[0].non_exist_key")),
+ null)
+ }
+
+ test("$.store.basket[*].non_exist_key") {
+ checkEvaluation(
+ GetJsonObject(Literal(json), Literal("$.store.basket[*].non_exist_key")),
+ null)
+ }
+
+ test("non foldable literal") {
+ checkEvaluation(
+ GetJsonObject(NonFoldableLiteral(json), NonFoldableLiteral("$.fb:testid")),
+ "1234")
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
new file mode 100644
index 0000000000..71c26a6f8d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+class JsonFunctionsSuite extends QueryTest {
+
+ private lazy val ctx = org.apache.spark.sql.test.TestSQLContext
+ import ctx.implicits._
+
+ test("function get_json_object") {
+ val df: DataFrame = Seq(("""{"name": "alice", "age": 5}""", "")).toDF("a", "b")
+ checkAnswer(
+ df.selectExpr("get_json_object(a, '$.name')", "get_json_object(a, '$.age')"),
+ Row("alice", "5"))
+ }
+
+}
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index c46a4a4b0b..d4fc6c2b6e 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -266,6 +266,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// Hive returns string from UTC formatted timestamp, spark returns timestamp type
"date_udf",
+ // Can't compare the result that have newline in it
+ "udf_get_json_object",
+
// Unlike Hive, we do support log base in (0, 1.0], therefore disable this
"udf7",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 8d0bf46e8f..167086db5b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -374,7 +374,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1)
SELECT title, air_date, doctor FROM episodes
""".cmd
- )
+ ),
+ TestTable("src_json",
+ s"""CREATE TABLE src_json (json STRING) STORED AS TEXTFILE
+ """.stripMargin.cmd,
+ s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/json.txt")}' INTO TABLE src_json".cmd)
)
hiveQTestUtilTables.foreach(registerTestTable)
diff --git a/sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f b/sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f
new file mode 100644
index 0000000000..1dcda4315a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f
@@ -0,0 +1 @@
+{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025","fb:testid":"1234"}
diff --git a/sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc b/sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc
new file mode 100644
index 0000000000..81c545efeb
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc
@@ -0,0 +1 @@
+1234
diff --git a/sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a b/sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a
new file mode 100644
index 0000000000..99127db9e3
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a
@@ -0,0 +1 @@
+amy {"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}}
diff --git a/sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb b/sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb
new file mode 100644
index 0000000000..0bc0399829
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb
@@ -0,0 +1 @@
+{"price":19.95,"color":"red"} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
diff --git a/sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5 b/sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5
new file mode 100644
index 0000000000..4f7e09bd3f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5
@@ -0,0 +1 @@
+{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
diff --git a/sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab b/sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab
new file mode 100644
index 0000000000..b2d212a597
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab
@@ -0,0 +1 @@
+reference ["reference","fiction","fiction"] ["0-553-21311-3","0-395-19395-8"] [{"age":25,"name":"bob"},{"age":26,"name":"jack"}]
diff --git a/sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da b/sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da
new file mode 100644
index 0000000000..21d88629fc
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da
@@ -0,0 +1 @@
+25 [25,26]
diff --git a/sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0 b/sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0
new file mode 100644
index 0000000000..e60721e1dd
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0
@@ -0,0 +1 @@
+2 [[1,2,{"b":"y","a":"x"}],[3,4],[5,6]] 1 [1,2,{"b":"y","a":"x"}] [1,2,{"b":"y","a":"x"},3,4,5,6] y ["y"]
diff --git a/sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd9844 b/sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd9844
new file mode 100644
index 0000000000..356fcdf713
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd9844
@@ -0,0 +1 @@
+NULL NULL NULL NULL NULL NULL
diff --git a/sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b b/sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b
new file mode 100644
index 0000000000..ef4a39675e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b
@@ -0,0 +1 @@
+94025
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index a7cfac51cc..edb2755367 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -587,6 +587,57 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|select * where key = 4
""".stripMargin)
+ // test get_json_object again Hive, because the HiveCompatabilitySuite cannot handle result
+ // with newline in it.
+ createQueryTest("get_json_object #1",
+ "SELECT get_json_object(src_json.json, '$') FROM src_json")
+
+ createQueryTest("get_json_object #2",
+ "SELECT get_json_object(src_json.json, '$.owner'), get_json_object(src_json.json, '$.store')" +
+ " FROM src_json")
+
+ createQueryTest("get_json_object #3",
+ "SELECT get_json_object(src_json.json, '$.store.bicycle'), " +
+ "get_json_object(src_json.json, '$.store.book') FROM src_json")
+
+ createQueryTest("get_json_object #4",
+ "SELECT get_json_object(src_json.json, '$.store.book[0]'), " +
+ "get_json_object(src_json.json, '$.store.book[*]') FROM src_json")
+
+ createQueryTest("get_json_object #5",
+ "SELECT get_json_object(src_json.json, '$.store.book[0].category'), " +
+ "get_json_object(src_json.json, '$.store.book[*].category'), " +
+ "get_json_object(src_json.json, '$.store.book[*].isbn'), " +
+ "get_json_object(src_json.json, '$.store.book[*].reader') FROM src_json")
+
+ createQueryTest("get_json_object #6",
+ "SELECT get_json_object(src_json.json, '$.store.book[*].reader[0].age'), " +
+ "get_json_object(src_json.json, '$.store.book[*].reader[*].age') FROM src_json")
+
+ createQueryTest("get_json_object #7",
+ "SELECT get_json_object(src_json.json, '$.store.basket[0][1]'), " +
+ "get_json_object(src_json.json, '$.store.basket[*]'), " +
+ // Hive returns wrong result with [*][0], so this expression is change to make test pass
+ "get_json_object(src_json.json, '$.store.basket[0][0]'), " +
+ "get_json_object(src_json.json, '$.store.basket[0][*]'), " +
+ "get_json_object(src_json.json, '$.store.basket[*][*]'), " +
+ "get_json_object(src_json.json, '$.store.basket[0][2].b'), " +
+ "get_json_object(src_json.json, '$.store.basket[0][*].b') FROM src_json")
+
+ createQueryTest("get_json_object #8",
+ "SELECT get_json_object(src_json.json, '$.non_exist_key'), " +
+ "get_json_object(src_json.json, '$..no_recursive'), " +
+ "get_json_object(src_json.json, '$.store.book[10]'), " +
+ "get_json_object(src_json.json, '$.store.book[0].non_exist_key'), " +
+ "get_json_object(src_json.json, '$.store.basket[*].non_exist_key'), " +
+ "get_json_object(src_json.json, '$.store.basket[0][*].non_exist_key') FROM src_json")
+
+ createQueryTest("get_json_object #9",
+ "SELECT get_json_object(src_json.json, '$.zip code') FROM src_json")
+
+ createQueryTest("get_json_object #10",
+ "SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json")
+
test("predicates contains an empty AttributeSet() references") {
sql(
"""