aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenchen Fan <cloud0fan@outlook.com>2015-03-05 14:49:01 -0800
committerMichael Armbrust <michael@databricks.com>2015-03-05 14:49:01 -0800
commit5873c713cc47af311f517ea33a6110993a410377 (patch)
tree2cae21463159fad0b0b8cbe2dcaab3c6e2f7887f
parent424a86a1ed2a3e6dd54cf8b09fe2f13a1311b7e6 (diff)
downloadspark-5873c713cc47af311f517ea33a6110993a410377.tar.gz
spark-5873c713cc47af311f517ea33a6110993a410377.tar.bz2
spark-5873c713cc47af311f517ea33a6110993a410377.zip
[SPARK-6145][SQL] fix ORDER BY on nested fields
Based on #4904 with style errors fixed. `LogicalPlan#resolve` will not only produce `Attribute`, but also "`GetField` chain". So in `ResolveSortReferences`, after resolve the ordering expressions, we should not just collect the `Attribute` results, but also `Attribute` at the bottom of "`GetField` chain". Author: Wenchen Fan <cloud0fan@outlook.com> Author: Michael Armbrust <michael@databricks.com> Closes #4918 from marmbrus/pr/4904 and squashes the following commits: 997f84e [Michael Armbrust] fix style 3eedbfc [Wenchen Fan] fix 6145
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala5
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala10
3 files changed, 14 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index c363a5efac..54ab13ca35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -385,7 +385,7 @@ class SqlParser extends AbstractSparkSQLParser {
protected lazy val dotExpressionHeader: Parser[Expression] =
(ident <~ ".") ~ ident ~ rep("." ~> ident) ^^ {
- case i1 ~ i2 ~ rest => UnresolvedAttribute(i1 + "." + i2 + rest.mkString(".", ".", ""))
+ case i1 ~ i2 ~ rest => UnresolvedAttribute((Seq(i1, i2) ++ rest).mkString("."))
}
protected lazy val dataType: Parser[DataType] =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e4e542562f..7753331748 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -310,7 +310,7 @@ class Analyzer(catalog: Catalog,
}
/**
- * In many dialects of SQL is it valid to sort by attributes that are not present in the SELECT
+ * In many dialects of SQL it is valid to sort by attributes that are not present in the SELECT
* clause. This rule detects such queries and adds the required attributes to the original
* projection, so that they will be available during sorting. Another projection is added to
* remove these attributes after sorting.
@@ -321,7 +321,8 @@ class Analyzer(catalog: Catalog,
if !s.resolved && p.resolved =>
val unresolved = ordering.flatMap(_.collect { case UnresolvedAttribute(name) => name })
val resolved = unresolved.flatMap(child.resolve(_, resolver))
- val requiredAttributes = AttributeSet(resolved.collect { case a: Attribute => a })
+ val requiredAttributes =
+ AttributeSet(resolved.flatMap(_.collect { case a: Attribute => a }))
val missingInProject = requiredAttributes -- p.output
if (missingInProject.nonEmpty) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 097bf0dd23..4dedcd365f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1049,4 +1049,14 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
rdd.toDF().registerTempTable("distinctData")
checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), Row(2))
}
+
+ test("SPARK-6145: ORDER BY test for nested fields") {
+ jsonRDD(sparkContext.makeRDD(
+ """{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil)).registerTempTable("nestedOrder")
+ // These should be successfully analyzed
+ sql("SELECT 1 FROM nestedOrder ORDER BY a.b").queryExecution.analyzed
+ sql("SELECT a.b FROM nestedOrder ORDER BY a.b").queryExecution.analyzed
+ sql("SELECT 1 FROM nestedOrder ORDER BY a.a.a").queryExecution.analyzed
+ sql("SELECT 1 FROM nestedOrder ORDER BY c[0].d").queryExecution.analyzed
+ }
}