[SPARK-10534] [SQL] ORDER BY clause allows only columns that are present in the select projection list

Find out the missing attributes by recursively looking at the sort order expression and rest of the code takes care of projecting them out. Added description from cloud-fan I wanna explain a bit more about this bug. When we resolve sort ordering, we will use a special method, which only resolves UnresolvedAttributes and UnresolvedExtractValue. However, for something like Floor('a), even the 'a is resolved, the floor expression may still being unresolved as data type mismatch(for example, 'a is string type and Floor need double type), thus can't pass this filter, and we can't push down this missing attribute 'a Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #9123 from dilipbiswal/SPARK-10534.
author: Dilip Biswal <dbiswal@us.ibm.com> 2015-10-21 11:10:32 -0700
committer: Yin Huai <yhuai@databricks.com> 2015-10-21 11:10:32 -0700
commit: 49ea0e9d7ce805d312d94a5b2936eec2053bc052 (patch)
tree: 5870f5037f7c7500c4727c50318877e381ef7405 /sql/catalyst
parent: ccf536f903ef1f81fb3e1b6ce781d5e40d0ae3e0 (diff)
download: spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.tar.gz
spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.tar.bz2
spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.zip
2 files changed, 11 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e6046055bf..9237f2f3dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -482,7 +482,7 @@ class Analyzer(
       val newOrdering = resolveSortOrders(ordering, grandchild, throws = true)
       // Construct a set that contains all of the attributes that we need to evaluate the
       // ordering.
-      val requiredAttributes = AttributeSet(newOrdering.filter(_.resolved))
+      val requiredAttributes = AttributeSet(newOrdering).filter(_.resolved)
       // Figure out which ones are missing from the projection, so that we can add them and
       // remove them after the sort.
       val missingInProject = requiredAttributes -- child.output
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index ec05cfa63c..24af8483a7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -143,4 +143,14 @@ class AnalysisSuite extends AnalysisTest {
     plan = testRelation.select(CreateStructUnsafe(Seq(a, (a + 1).as("a+1"))).as("col"))
     checkAnalysis(plan, plan)
   }
+
+  test("SPARK-10534: resolve attribute references in order by clause") {
+    val a = testRelation2.output(0)
+    val c = testRelation2.output(2)
+
+    val plan = testRelation2.select('c).orderBy(Floor('a).asc)
+    val expected = testRelation2.select(c, a).orderBy(Floor(a.cast(DoubleType)).asc).select(c)
+
+    checkAnalysis(plan, expected)
+  }
 }
author	Dilip Biswal <dbiswal@us.ibm.com>	2015-10-21 11:10:32 -0700
committer	Yin Huai <yhuai@databricks.com>	2015-10-21 11:10:32 -0700
commit	49ea0e9d7ce805d312d94a5b2936eec2053bc052 (patch)
tree	5870f5037f7c7500c4727c50318877e381ef7405 /sql/catalyst
parent	ccf536f903ef1f81fb3e1b6ce781d5e40d0ae3e0 (diff)
download	spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.tar.gz spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.tar.bz2 spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.zip