aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-02-11 11:08:21 +0800
committerCheng Lian <lian@databricks.com>2016-02-11 11:08:21 +0800
commit0f09f0226983cdc409ef504dff48395787dc844f (patch)
treeb8e8f9949ec5c87f1f94986ddc682e1a0b27c8be
parent663cc400f3b927633e47df07eea409da0e9ae70e (diff)
downloadspark-0f09f0226983cdc409ef504dff48395787dc844f.tar.gz
spark-0f09f0226983cdc409ef504dff48395787dc844f.tar.bz2
spark-0f09f0226983cdc409ef504dff48395787dc844f.zip
[SPARK-13205][SQL] SQL Generation Support for Self Join
This PR addresses two issues: - Self join does not work in SQL Generation - When creating new instances for `LogicalRelation`, `metastoreTableIdentifier` is lost. liancheng Could you please review the code changes? Thank you! Author: gatorsmile <gatorsmile@gmail.com> Closes #11084 from gatorsmile/selfJoinInSQLGen.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala6
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala10
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala8
3 files changed, 22 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index fa97f3d719..0e0748ff32 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -76,7 +76,11 @@ case class LogicalRelation(
/** Used to lookup original attribute capitalization */
val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o)))
- def newInstance(): this.type = LogicalRelation(relation).asInstanceOf[this.type]
+ def newInstance(): this.type =
+ LogicalRelation(
+ relation,
+ expectedOutputAttributes,
+ metastoreTableIdentifier).asInstanceOf[this.type]
override def simpleString: String = s"Relation[${output.mkString(",")}] $relation"
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index fc5725d691..4b75e60f8d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -142,7 +142,15 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
Some(s"`$database`.`$table`")
case Subquery(alias, child) =>
- toSQL(child).map(childSQL => s"($childSQL) AS $alias")
+ toSQL(child).map( childSQL =>
+ child match {
+ // Parentheses is not used for persisted data source relations
+ // e.g., select x.c1 from (t1) as x inner join (t1) as y on x.c1 = y.c1
+ case Subquery(_, _: LogicalRelation | _: MetastoreRelation) =>
+ s"$childSQL AS $alias"
+ case _ =>
+ s"($childSQL) AS $alias"
+ })
case Join(left, right, joinType, condition) =>
for {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index 129bfe0a7d..80ae312d91 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -104,6 +104,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0")
}
+ test("self join") {
+ checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
+ }
+
+ test("self join with group by") {
+ checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key")
+ }
+
test("three-child union") {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
}