aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorDilip Biswal <dbiswal@us.ibm.com>2015-10-21 11:10:32 -0700
committerYin Huai <yhuai@databricks.com>2015-10-21 11:10:32 -0700
commit49ea0e9d7ce805d312d94a5b2936eec2053bc052 (patch)
tree5870f5037f7c7500c4727c50318877e381ef7405 /sql/catalyst
parentccf536f903ef1f81fb3e1b6ce781d5e40d0ae3e0 (diff)
downloadspark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.tar.gz
spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.tar.bz2
spark-49ea0e9d7ce805d312d94a5b2936eec2053bc052.zip
[SPARK-10534] [SQL] ORDER BY clause allows only columns that are present in the select projection list
Find out the missing attributes by recursively looking at the sort order expression and rest of the code takes care of projecting them out. Added description from cloud-fan I wanna explain a bit more about this bug. When we resolve sort ordering, we will use a special method, which only resolves UnresolvedAttributes and UnresolvedExtractValue. However, for something like Floor('a), even the 'a is resolved, the floor expression may still being unresolved as data type mismatch(for example, 'a is string type and Floor need double type), thus can't pass this filter, and we can't push down this missing attribute 'a Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #9123 from dilipbiswal/SPARK-10534.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala10
2 files changed, 11 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e6046055bf..9237f2f3dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -482,7 +482,7 @@ class Analyzer(
val newOrdering = resolveSortOrders(ordering, grandchild, throws = true)
// Construct a set that contains all of the attributes that we need to evaluate the
// ordering.
- val requiredAttributes = AttributeSet(newOrdering.filter(_.resolved))
+ val requiredAttributes = AttributeSet(newOrdering).filter(_.resolved)
// Figure out which ones are missing from the projection, so that we can add them and
// remove them after the sort.
val missingInProject = requiredAttributes -- child.output
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index ec05cfa63c..24af8483a7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -143,4 +143,14 @@ class AnalysisSuite extends AnalysisTest {
plan = testRelation.select(CreateStructUnsafe(Seq(a, (a + 1).as("a+1"))).as("col"))
checkAnalysis(plan, plan)
}
+
+ test("SPARK-10534: resolve attribute references in order by clause") {
+ val a = testRelation2.output(0)
+ val c = testRelation2.output(2)
+
+ val plan = testRelation2.select('c).orderBy(Floor('a).asc)
+ val expected = testRelation2.select(c, a).orderBy(Floor(a.cast(DoubleType)).asc).select(c)
+
+ checkAnalysis(plan, expected)
+ }
}