diff options
author | Andrew Ray <ray.andrew@gmail.com> | 2016-09-15 21:45:29 +0200 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-09-15 21:45:29 +0200 |
commit | b72486f82dd9920135442191be5d384028e7fb41 (patch) | |
tree | 92a17fed6c2538036d26757b0d2507c9e4b7a061 | |
parent | 1202075c95eabba0ffebc170077df798f271a139 (diff) | |
download | spark-b72486f82dd9920135442191be5d384028e7fb41.tar.gz spark-b72486f82dd9920135442191be5d384028e7fb41.tar.bz2 spark-b72486f82dd9920135442191be5d384028e7fb41.zip |
[SPARK-17458][SQL] Alias specified for aggregates in a pivot are not honored
## What changes were proposed in this pull request?
This change preserves aliases that are given for pivot aggregations
## How was this patch tested?
New unit test
Author: Andrew Ray <ray.andrew@gmail.com>
Closes #15111 from aray/SPARK-17458.
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 10 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala | 11 |
2 files changed, 20 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 92bf8e0536..5210f42c55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -373,7 +373,15 @@ class Analyzer( case Pivot(groupByExprs, pivotColumn, pivotValues, aggregates, child) => val singleAgg = aggregates.size == 1 def outputName(value: Literal, aggregate: Expression): String = { - if (singleAgg) value.toString else value + "_" + aggregate.sql + if (singleAgg) { + value.toString + } else { + val suffix = aggregate match { + case n: NamedExpression => n.name + case _ => aggregate.sql + } + value + "_" + suffix + } } if (aggregates.forall(a => PivotFirst.supportsDataType(a.dataType))) { // Since evaluating |pivotValues| if statements for each input row can get slow this is an diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala index d5cb5e1568..1bbe1354d5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala @@ -197,4 +197,15 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{ Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil ) } + + test("pivot preserves aliases if given") { + assertResult( + Array("year", "dotNET_foo", "dotNET_avg(`earnings`)", "Java_foo", "Java_avg(`earnings`)") + )( + courseSales.groupBy($"year") + .pivot("course", Seq("dotNET", "Java")) + .agg(sum($"earnings").as("foo"), avg($"earnings")).columns + ) + } + } |