diff options
author | Liang-Chi Hsieh <viirya@gmail.com> | 2015-03-17 18:58:52 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-03-17 18:58:52 -0700 |
commit | 5c80643d137008ce8a0ac7467b31d8d52383c105 (patch) | |
tree | 5e1157909a2d3bd0a5d707b7d4fb6531c97a859d | |
parent | a012e08635dc2d643715e11680fd6a3fb3afe44a (diff) | |
download | spark-5c80643d137008ce8a0ac7467b31d8d52383c105.tar.gz spark-5c80643d137008ce8a0ac7467b31d8d52383c105.tar.bz2 spark-5c80643d137008ce8a0ac7467b31d8d52383c105.zip |
[SPARK-5908][SQL] Resolve UdtfsAlias when only single Alias is used
`ResolveUdtfsAlias` in `hiveUdfs` only considers the `HiveGenericUdtf` with multiple alias. When only single alias is used with `HiveGenericUdtf`, the alias is not working.
Author: Liang-Chi Hsieh <viirya@gmail.com>
Closes #4692 from viirya/udft_alias and squashes the following commits:
8a3bae4 [Liang-Chi Hsieh] No need to test selected column from DataFrame since DataFrame API is updated.
160a379 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into udft_alias
e6531cc [Liang-Chi Hsieh] Selected column from DataFrame should not re-analyze logical plan.
a45cc2a [Liang-Chi Hsieh] Resolve UdtfsAlias when only single Alias is used.
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala | 2 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 7 |
2 files changed, 9 insertions, 0 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 34c21c1176..4a702d9656 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -333,6 +333,8 @@ private[spark] object ResolveUdtfsAlias extends Rule[LogicalPlan] { if projectList.exists(_.isInstanceOf[MultiAlias]) && projectList.size != 1 => throw new TreeNodeException(p, "only single Generator supported for SELECT clause") + case Project(Seq(Alias(udtf @ HiveGenericUdtf(_, _, _), name)), child) => + Generate(udtf.copy(aliasNames = Seq(name)), join = false, outer = false, None, child) case Project(Seq(MultiAlias(udtf @ HiveGenericUdtf(_, _, _), names)), child) => Generate(udtf.copy(aliasNames = names), join = false, outer = false, None, child) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 22ea19bd82..1187228f4c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -397,6 +397,13 @@ class SQLQuerySuite extends QueryTest { dropTempTable("data") } + test("resolve udtf with single alias") { + val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i+1}]}""")) + jsonRDD(rdd).registerTempTable("data") + val df = sql("SELECT explode(a) AS val FROM data") + val col = df("val") + } + test("logical.Project should not be resolved if it contains aggregates or generators") { // This test is used to test the fix of SPARK-5875. // The original issue was that Project's resolved will be true when it contains |