diff options
author | gatorsmile <gatorsmile@gmail.com> | 2016-02-11 11:08:21 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-02-11 11:08:21 +0800 |
commit | 0f09f0226983cdc409ef504dff48395787dc844f (patch) | |
tree | b8e8f9949ec5c87f1f94986ddc682e1a0b27c8be | |
parent | 663cc400f3b927633e47df07eea409da0e9ae70e (diff) | |
download | spark-0f09f0226983cdc409ef504dff48395787dc844f.tar.gz spark-0f09f0226983cdc409ef504dff48395787dc844f.tar.bz2 spark-0f09f0226983cdc409ef504dff48395787dc844f.zip |
[SPARK-13205][SQL] SQL Generation Support for Self Join
This PR addresses two issues:
- Self join does not work in SQL Generation
- When creating new instances for `LogicalRelation`, `metastoreTableIdentifier` is lost.
liancheng Could you please review the code changes? Thank you!
Author: gatorsmile <gatorsmile@gmail.com>
Closes #11084 from gatorsmile/selfJoinInSQLGen.
3 files changed, 22 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala index fa97f3d719..0e0748ff32 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala @@ -76,7 +76,11 @@ case class LogicalRelation( /** Used to lookup original attribute capitalization */ val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o))) - def newInstance(): this.type = LogicalRelation(relation).asInstanceOf[this.type] + def newInstance(): this.type = + LogicalRelation( + relation, + expectedOutputAttributes, + metastoreTableIdentifier).asInstanceOf[this.type] override def simpleString: String = s"Relation[${output.mkString(",")}] $relation" } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index fc5725d691..4b75e60f8d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -142,7 +142,15 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi Some(s"`$database`.`$table`") case Subquery(alias, child) => - toSQL(child).map(childSQL => s"($childSQL) AS $alias") + toSQL(child).map( childSQL => + child match { + // Parentheses is not used for persisted data source relations + // e.g., select x.c1 from (t1) as x inner join (t1) as y on x.c1 = y.c1 + case Subquery(_, _: LogicalRelation | _: MetastoreRelation) => + s"$childSQL AS $alias" + case _ => + s"($childSQL) AS $alias" + }) case Join(left, right, joinType, condition) => for { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index 129bfe0a7d..80ae312d91 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -104,6 +104,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0") } + test("self join") { + checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key") + } + + test("self join with group by") { + checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key") + } + test("three-child union") { checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0") } |