[SPARK-13087][SQL] Fix group by function for sort based aggregation

It is not valid to call `toAttribute` on a `NamedExpression` unless we know for sure that the child produced that `NamedExpression`. The current code worked fine when the grouping expressions were simple, but when they were a derived value this blew up at execution time. Author: Michael Armbrust <michael@databricks.com> Closes #11013 from marmbrus/groupByFunction-master.
author: Michael Armbrust <michael@databricks.com> 2016-02-02 16:48:59 +0800
committer: Yin Huai <yhuai@databricks.com> 2016-02-02 16:48:59 +0800
commit: 22ba21348b28d8b1909ccde6fe17fb9e68531e5a (patch)
tree: 5de6bca2fa387c50295bbf88a775194218827d3a
parent: b8666fd0e2a797924eb2e94ac5558aba2a9b5140 (diff)
download: spark-22ba21348b28d8b1909ccde6fe17fb9e68531e5a.tar.gz
spark-22ba21348b28d8b1909ccde6fe17fb9e68531e5a.tar.bz2
spark-22ba21348b28d8b1909ccde6fe17fb9e68531e5a.zip
2 files changed, 10 insertions, 3 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
index 83379ae90f..1e113ccd4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
@@ -33,15 +33,14 @@ object Utils {
       resultExpressions: Seq[NamedExpression],
       child: SparkPlan): Seq[SparkPlan] = {
 
-    val groupingAttributes = groupingExpressions.map(_.toAttribute)
     val completeAggregateExpressions = aggregateExpressions.map(_.copy(mode = Complete))
     val completeAggregateAttributes = completeAggregateExpressions.map {
       expr => aggregateFunctionToAttribute(expr.aggregateFunction, expr.isDistinct)
     }
 
     SortBasedAggregate(
-      requiredChildDistributionExpressions = Some(groupingAttributes),
-      groupingExpressions = groupingAttributes,
+      requiredChildDistributionExpressions = Some(groupingExpressions),
+      groupingExpressions = groupingExpressions,
       aggregateExpressions = completeAggregateExpressions,
       aggregateAttributes = completeAggregateAttributes,
       initialInputBufferOffset = 0,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 3e4cf3f79e..7a9ed1eaf3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -193,6 +193,14 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     sqlContext.dropTempTable("emptyTable")
   }
 
+  test("group by function") {
+    Seq((1, 2)).toDF("a", "b").registerTempTable("data")
+
+    checkAnswer(
+      sql("SELECT floor(a) AS a, collect_set(b) FROM data GROUP BY floor(a) ORDER BY a"),
+      Row(1, Array(2)) :: Nil)
+  }
+
   test("empty table") {
     // If there is no GROUP BY clause and the table is empty, we will generate a single row.
     checkAnswer(
author	Michael Armbrust <michael@databricks.com>	2016-02-02 16:48:59 +0800
committer	Yin Huai <yhuai@databricks.com>	2016-02-02 16:48:59 +0800
commit	22ba21348b28d8b1909ccde6fe17fb9e68531e5a (patch)
tree	5de6bca2fa387c50295bbf88a775194218827d3a
parent	b8666fd0e2a797924eb2e94ac5558aba2a9b5140 (diff)
download	spark-22ba21348b28d8b1909ccde6fe17fb9e68531e5a.tar.gz spark-22ba21348b28d8b1909ccde6fe17fb9e68531e5a.tar.bz2 spark-22ba21348b28d8b1909ccde6fe17fb9e68531e5a.zip