diff options
author | Liang-Chi Hsieh <viirya@appier.com> | 2015-10-12 09:16:14 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2015-10-12 09:16:14 -0700 |
commit | fcb37a04177edc2376e39dd0b910f0268f7c72ec (patch) | |
tree | 5996f49bd8b368e963f6f91f840a04d5d4fd3b54 | |
parent | 595012ea8b9c6afcc2fc024d5a5e198df765bd75 (diff) | |
download | spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.tar.gz spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.tar.bz2 spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.zip |
[SPARK-10960] [SQL] SQL with windowing function should be able to refer column in inner select
JIRA: https://issues.apache.org/jira/browse/SPARK-10960
When accessing a column in inner select from a select with window function, `AnalysisException` will be thrown. For example, an query like this:
select area, rank() over (partition by area order by tmp.month) + tmp.tmp1 as c1 from (select month, area, product, 1 as tmp1 from windowData) tmp
Currently, the rule `ExtractWindowExpressions` in `Analyzer` only extracts regular expressions from `WindowFunction`, `WindowSpecDefinition` and `AggregateExpression`. We need to also extract other attributes as the one in `Alias` as shown in the above query.
Author: Liang-Chi Hsieh <viirya@appier.com>
Closes #9011 from viirya/fix-window-inner-column.
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 27 |
2 files changed, 31 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index bf72d47ce1..f5597a08d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -831,6 +831,10 @@ class Analyzer( val withName = Alias(agg, s"_w${extractedExprBuffer.length}")() extractedExprBuffer += withName withName.toAttribute + + // Extracts other attributes + case attr: Attribute => extractExpr(attr) + }.asInstanceOf[NamedExpression] } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index ccc15eaa63..51b63f3688 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -838,6 +838,33 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ).map(i => Row(i._1, i._2, i._3))) } + test("window function: refer column in inner select block") { + val data = Seq( + WindowData(1, "a", 5), + WindowData(2, "a", 6), + WindowData(3, "b", 7), + WindowData(4, "b", 8), + WindowData(5, "c", 9), + WindowData(6, "c", 10) + ) + sparkContext.parallelize(data).toDF().registerTempTable("windowData") + + checkAnswer( + sql( + """ + |select area, rank() over (partition by area order by tmp.month) + tmp.tmp1 as c1 + |from (select month, area, product, 1 as tmp1 from windowData) tmp + """.stripMargin), + Seq( + ("a", 2), + ("a", 3), + ("b", 2), + ("b", 3), + ("c", 2), + ("c", 3) + ).map(i => Row(i._1, i._2))) + } + test("window function: partition and order expressions") { val data = Seq( WindowData(1, "a", 5), |