aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2015-11-10 11:21:31 -0800
committerYin Huai <yhuai@databricks.com>2015-11-10 11:21:31 -0800
commit53600854c270d4c953fe95fbae528740b5cf6603 (patch)
tree5bccb351008aff9c6b5182fdf22fbf6f77d69b88 /sql/hive
parentdfcfcbcc0448ebc6f02eba6bf0495832a321c87e (diff)
downloadspark-53600854c270d4c953fe95fbae528740b5cf6603.tar.gz
spark-53600854c270d4c953fe95fbae528740b5cf6603.tar.bz2
spark-53600854c270d4c953fe95fbae528740b5cf6603.zip
[SPARK-11590][SQL] use native json_tuple in lateral view
Author: Wenchen Fan <wenchen@databricks.com> Closes #9562 from cloud-fan/json-tuple.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala13
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala31
3 files changed, 48 insertions, 0 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 6f8ed413a0..091caab921 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -1821,6 +1821,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
}
val explode = "(?i)explode".r
+ val jsonTuple = "(?i)json_tuple".r
def nodesToGenerator(nodes: Seq[Node]): (Generator, Seq[String]) = {
val function = nodes.head
@@ -1833,6 +1834,9 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case Token("TOK_FUNCTION", Token(explode(), Nil) :: child :: Nil) =>
(Explode(nodeToExpr(child)), attributes)
+ case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
+ (JsonTuple(children.map(nodeToExpr)), attributes)
+
case Token("TOK_FUNCTION", Token(functionName, Nil) :: children) =>
val functionInfo: FunctionInfo =
Option(FunctionRegistry.getFunctionInfo(functionName.toLowerCase)).getOrElse(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
index 528a7398b1..a330362b4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
@@ -18,6 +18,8 @@
package org.apache.spark.sql.hive
import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.spark.sql.catalyst.expressions.JsonTuple
+import org.apache.spark.sql.catalyst.plans.logical.Generate
import org.scalatest.BeforeAndAfterAll
import org.apache.spark.SparkFunSuite
@@ -183,4 +185,15 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
assertError("select interval '.1111111111' second",
"nanosecond 1111111111 outside range")
}
+
+ test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
+ val plan = HiveQl.parseSql(
+ """
+ |SELECT *
+ |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
+ |LATERAL VIEW json_tuple(json, 'f1', 'f2') jt AS a, b
+ """.stripMargin)
+
+ assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple])
+ }
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 9a425d7f6b..3427152b2d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1448,4 +1448,35 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
Row("1", "10") :: Row("2", "20") :: Row("3", "30") :: Row("4", "40") :: Nil)
}
}
+
+ test("SPARK-11590: use native json_tuple in lateral view") {
+ checkAnswer(sql(
+ """
+ |SELECT a, b
+ |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
+ |LATERAL VIEW json_tuple(json, 'f1', 'f2') jt AS a, b
+ """.stripMargin), Row("value1", "12"))
+
+ // we should use `c0`, `c1`... as the name of fields if no alias is provided, to follow hive.
+ checkAnswer(sql(
+ """
+ |SELECT c0, c1
+ |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
+ |LATERAL VIEW json_tuple(json, 'f1', 'f2') jt
+ """.stripMargin), Row("value1", "12"))
+
+ // we can also use `json_tuple` in project list.
+ checkAnswer(sql(
+ """
+ |SELECT json_tuple(json, 'f1', 'f2')
+ |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
+ """.stripMargin), Row("value1", "12"))
+
+ // we can also mix `json_tuple` with other project expressions.
+ checkAnswer(sql(
+ """
+ |SELECT json_tuple(json, 'f1', 'f2'), 3.14, str
+ |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test
+ """.stripMargin), Row("value1", "12", 3.14, "hello"))
+ }
}