aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-02-01 11:22:02 -0800
committerMichael Armbrust <michael@databricks.com>2016-02-01 11:22:02 -0800
commit33c8a490f7f64320c53530a57bd8d34916e3607c (patch)
treedba1fa3dc9b1b12af869425ac6dd49174ac64d37
parent6075573a93176ee8c071888e4525043d9e73b061 (diff)
downloadspark-33c8a490f7f64320c53530a57bd8d34916e3607c.tar.gz
spark-33c8a490f7f64320c53530a57bd8d34916e3607c.tar.bz2
spark-33c8a490f7f64320c53530a57bd8d34916e3607c.zip
[SPARK-12989][SQL] Delaying Alias Cleanup after ExtractWindowExpressions
JIRA: https://issues.apache.org/jira/browse/SPARK-12989 In the rule `ExtractWindowExpressions`, we simply replace alias by the corresponding attribute. However, this will cause an issue exposed by the following case: ```scala val data = Seq(("a", "b", "c", 3), ("c", "b", "a", 3)).toDF("A", "B", "C", "num") .withColumn("Data", struct("A", "B", "C")) .drop("A") .drop("B") .drop("C") val winSpec = Window.partitionBy("Data.A", "Data.B").orderBy($"num".desc) data.select($"*", max("num").over(winSpec) as "max").explain(true) ``` In this case, both `Data.A` and `Data.B` are `alias` in `WindowSpecDefinition`. If we replace these alias expression by their alias names, we are unable to know what they are since they will not be put in `missingExpr` too. Author: gatorsmile <gatorsmile@gmail.com> Author: xiaoli <lixiao1983@gmail.com> Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local> Closes #10963 from gatorsmile/seletStarAfterColDrop.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala5
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala10
2 files changed, 13 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5fe700ee00..ee60fca1ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -883,12 +883,13 @@ class Analyzer(
if (missingExpr.nonEmpty) {
extractedExprBuffer += ne
}
- ne.toAttribute
+ // alias will be cleaned in the rule CleanupAliases
+ ne
case e: Expression if e.foldable =>
e // No need to create an attribute reference if it will be evaluated as a Literal.
case e: Expression =>
// For other expressions, we extract it and replace it with an AttributeReference (with
- // an interal column name, e.g. "_w0").
+ // an internal column name, e.g. "_w0").
val withName = Alias(e, s"_w${extractedExprBuffer.length}")()
extractedExprBuffer += withName
withName.toAttribute
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
index d38842c3c0..2bcbb1983f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
@@ -344,4 +344,14 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext {
Row("b", 1, null, null, null, null, null, null),
Row("b", 2, null, null, null, null, null, null)))
}
+
+ test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") {
+ val src = Seq((0, 3, 5)).toDF("a", "b", "c")
+ .withColumn("Data", struct("a", "b"))
+ .drop("a")
+ .drop("b")
+ val winSpec = Window.partitionBy("Data.a", "Data.b").orderBy($"c".desc)
+ val df = src.select($"*", max("c").over(winSpec) as "max")
+ checkAnswer(df, Row(5, Row(0, 3), 5))
+ }
}