aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2015-03-17 19:47:51 -0700
committerMichael Armbrust <michael@databricks.com>2015-03-17 19:47:51 -0700
commit3579003115fa3217cff6aa400729d96b0c7b257b (patch)
treebad3f848417dfe25c37dff200fb77d0083d13019 /sql/core
parenta6ee2f7940b9a64a81667615586ae597da837974 (diff)
downloadspark-3579003115fa3217cff6aa400729d96b0c7b257b.tar.gz
spark-3579003115fa3217cff6aa400729d96b0c7b257b.tar.bz2
spark-3579003115fa3217cff6aa400729d96b0c7b257b.zip
[SPARK-6247][SQL] Fix resolution of ambiguous joins caused by new aliases
We need to handle ambiguous `exprId`s that are produced by new aliases as well as those caused by leaf nodes (`MultiInstanceRelation`). Attempting to fix this revealed a bug in `equals` for `Alias` as these objects were comparing equal even when the expression ids did not match. Additionally, `LocalRelation` did not correctly provide statistics, and some tests in `catalyst` and `hive` were not using the helper functions for comparing plans. Based on #4991 by chenghao-intel Author: Michael Armbrust <michael@databricks.com> Closes #5062 from marmbrus/selfJoins and squashes the following commits: 8e9b84b [Michael Armbrust] check qualifier too 8038a36 [Michael Armbrust] handle aggs too 0b9c687 [Michael Armbrust] fix more tests c3c574b [Michael Armbrust] revert change. 725f1ab [Michael Armbrust] add statistics a925d08 [Michael Armbrust] check for conflicting attributes in join resolution b022ef7 [Michael Armbrust] Handle project aliases. d8caa40 [Michael Armbrust] test case: SPARK-6247 f9c67c2 [Michael Armbrust] Check for duplicate attributes in join resolution. 898af73 [Michael Armbrust] Fix Alias equality.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala6
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala31
2 files changed, 36 insertions, 1 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3036fbc05d..a53ae97d62 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql
+import org.apache.spark.sql.catalyst.expressions.NamedExpression
+import org.apache.spark.sql.catalyst.plans.logical.{Project, NoRelation}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.test.TestSQLContext.implicits._
@@ -311,7 +313,9 @@ class ColumnExpressionSuite extends QueryTest {
}
test("lift alias out of cast") {
- assert(col("1234").as("name").cast("int").expr === col("1234").cast("int").as("name").expr)
+ compareExpressions(
+ col("1234").as("name").cast("int").expr,
+ col("1234").cast("int").as("name").expr)
}
test("columns can be compared") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4dedcd365f..a3c0076e16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -36,6 +36,37 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
import org.apache.spark.sql.test.TestSQLContext.implicits._
val sqlCtx = TestSQLContext
+ test("self join with aliases") {
+ Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str").registerTempTable("df")
+
+ checkAnswer(
+ sql(
+ """
+ |SELECT x.str, COUNT(*)
+ |FROM df x JOIN df y ON x.str = y.str
+ |GROUP BY x.str
+ """.stripMargin),
+ Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
+ }
+
+ test("self join with alias in agg") {
+ Seq(1,2,3)
+ .map(i => (i, i.toString))
+ .toDF("int", "str")
+ .groupBy("str")
+ .agg($"str", count("str").as("strCount"))
+ .registerTempTable("df")
+
+ checkAnswer(
+ sql(
+ """
+ |SELECT x.str, SUM(x.strCount)
+ |FROM df x JOIN df y ON x.str = y.str
+ |GROUP BY x.str
+ """.stripMargin),
+ Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
+ }
+
test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
checkAnswer(
sql("SELECT a FROM testData2 SORT BY a"),