aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2015-03-17 18:58:52 -0700
committerMichael Armbrust <michael@databricks.com>2015-03-17 18:58:52 -0700
commit5c80643d137008ce8a0ac7467b31d8d52383c105 (patch)
tree5e1157909a2d3bd0a5d707b7d4fb6531c97a859d /sql/hive
parenta012e08635dc2d643715e11680fd6a3fb3afe44a (diff)
downloadspark-5c80643d137008ce8a0ac7467b31d8d52383c105.tar.gz
spark-5c80643d137008ce8a0ac7467b31d8d52383c105.tar.bz2
spark-5c80643d137008ce8a0ac7467b31d8d52383c105.zip
[SPARK-5908][SQL] Resolve UdtfsAlias when only single Alias is used
`ResolveUdtfsAlias` in `hiveUdfs` only considers the `HiveGenericUdtf` with multiple alias. When only single alias is used with `HiveGenericUdtf`, the alias is not working. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #4692 from viirya/udft_alias and squashes the following commits: 8a3bae4 [Liang-Chi Hsieh] No need to test selected column from DataFrame since DataFrame API is updated. 160a379 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into udft_alias e6531cc [Liang-Chi Hsieh] Selected column from DataFrame should not re-analyze logical plan. a45cc2a [Liang-Chi Hsieh] Resolve UdtfsAlias when only single Alias is used.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala7
2 files changed, 9 insertions, 0 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 34c21c1176..4a702d9656 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -333,6 +333,8 @@ private[spark] object ResolveUdtfsAlias extends Rule[LogicalPlan] {
if projectList.exists(_.isInstanceOf[MultiAlias]) && projectList.size != 1 =>
throw new TreeNodeException(p, "only single Generator supported for SELECT clause")
+ case Project(Seq(Alias(udtf @ HiveGenericUdtf(_, _, _), name)), child) =>
+ Generate(udtf.copy(aliasNames = Seq(name)), join = false, outer = false, None, child)
case Project(Seq(MultiAlias(udtf @ HiveGenericUdtf(_, _, _), names)), child) =>
Generate(udtf.copy(aliasNames = names), join = false, outer = false, None, child)
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 22ea19bd82..1187228f4c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -397,6 +397,13 @@ class SQLQuerySuite extends QueryTest {
dropTempTable("data")
}
+ test("resolve udtf with single alias") {
+ val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i+1}]}"""))
+ jsonRDD(rdd).registerTempTable("data")
+ val df = sql("SELECT explode(a) AS val FROM data")
+ val col = df("val")
+ }
+
test("logical.Project should not be resolved if it contains aggregates or generators") {
// This test is used to test the fix of SPARK-5875.
// The original issue was that Project's resolved will be true when it contains