[SPARK-10960] [SQL] SQL with windowing function should be able to refer column in inner select

JIRA: https://issues.apache.org/jira/browse/SPARK-10960 When accessing a column in inner select from a select with window function, `AnalysisException` will be thrown. For example, an query like this: select area, rank() over (partition by area order by tmp.month) + tmp.tmp1 as c1 from (select month, area, product, 1 as tmp1 from windowData) tmp Currently, the rule `ExtractWindowExpressions` in `Analyzer` only extracts regular expressions from `WindowFunction`, `WindowSpecDefinition` and `AggregateExpression`. We need to also extract other attributes as the one in `Alias` as shown in the above query. Author: Liang-Chi Hsieh <viirya@appier.com> Closes #9011 from viirya/fix-window-inner-column.
author: Liang-Chi Hsieh <viirya@appier.com> 2015-10-12 09:16:14 -0700
committer: Yin Huai <yhuai@databricks.com> 2015-10-12 09:16:14 -0700
commit: fcb37a04177edc2376e39dd0b910f0268f7c72ec (patch)
tree: 5996f49bd8b368e963f6f91f840a04d5d4fd3b54 /sql
parent: 595012ea8b9c6afcc2fc024d5a5e198df765bd75 (diff)
download: spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.tar.gz
spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.tar.bz2
spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.zip
2 files changed, 31 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bf72d47ce1..f5597a08d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -831,6 +831,10 @@ class Analyzer(
             val withName = Alias(agg, s"_w${extractedExprBuffer.length}")()
             extractedExprBuffer += withName
             withName.toAttribute
+
+          // Extracts other attributes
+          case attr: Attribute => extractExpr(attr)
+
         }.asInstanceOf[NamedExpression]
       }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index ccc15eaa63..51b63f3688 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -838,6 +838,33 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3)))
   }
 
+  test("window function: refer column in inner select block") {
+    val data = Seq(
+      WindowData(1, "a", 5),
+      WindowData(2, "a", 6),
+      WindowData(3, "b", 7),
+      WindowData(4, "b", 8),
+      WindowData(5, "c", 9),
+      WindowData(6, "c", 10)
+    )
+    sparkContext.parallelize(data).toDF().registerTempTable("windowData")
+
+    checkAnswer(
+      sql(
+        """
+          |select area, rank() over (partition by area order by tmp.month) + tmp.tmp1 as c1
+          |from (select month, area, product, 1 as tmp1 from windowData) tmp
+        """.stripMargin),
+      Seq(
+        ("a", 2),
+        ("a", 3),
+        ("b", 2),
+        ("b", 3),
+        ("c", 2),
+        ("c", 3)
+      ).map(i => Row(i._1, i._2)))
+  }
+
   test("window function: partition and order expressions") {
     val data = Seq(
       WindowData(1, "a", 5),
author	Liang-Chi Hsieh <viirya@appier.com>	2015-10-12 09:16:14 -0700
committer	Yin Huai <yhuai@databricks.com>	2015-10-12 09:16:14 -0700
commit	fcb37a04177edc2376e39dd0b910f0268f7c72ec (patch)
tree	5996f49bd8b368e963f6f91f840a04d5d4fd3b54 /sql
parent	595012ea8b9c6afcc2fc024d5a5e198df765bd75 (diff)
download	spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.tar.gz spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.tar.bz2 spark-fcb37a04177edc2376e39dd0b910f0268f7c72ec.zip