aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-02-23 15:16:59 +0800
committerCheng Lian <lian@databricks.com>2016-02-23 15:16:59 +0800
commit01e10c9fef51c69ecf3060929c62d113cfc672b9 (patch)
tree303af42f70b2119909466d3f17f34187640ac240 /sql
parent9dd5399d78d74a8ba2326db25704ba7cb7aa353d (diff)
downloadspark-01e10c9fef51c69ecf3060929c62d113cfc672b9.tar.gz
spark-01e10c9fef51c69ecf3060929c62d113cfc672b9.tar.bz2
spark-01e10c9fef51c69ecf3060929c62d113cfc672b9.zip
[SPARK-13236] SQL Generation for Set Operations
This PR is to implement SQL generation for the following three set operations: - Union Distinct - Intersect - Except liancheng Thanks! Author: gatorsmile <gatorsmile@gmail.com> Author: xiaoli <lixiao1983@gmail.com> Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local> Closes #11195 from gatorsmile/setOpSQLGen.
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala14
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala24
2 files changed, 33 insertions, 5 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 32f17f41c9..e66cc127ea 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
* supported by this builder (yet).
*/
class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Logging {
- require(logicalPlan.resolved, "SQLBuilder only supports resloved logical query plans")
+ require(logicalPlan.resolved, "SQLBuilder only supports resolved logical query plans")
def this(df: DataFrame) = this(df.queryExecution.analyzed, df.sqlContext)
@@ -98,10 +98,20 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
}
build(toSQL(p.child), whereOrHaving, p.condition.sql)
+ case p @ Distinct(u: Union) if u.children.length > 1 =>
+ val childrenSql = u.children.map(c => s"(${toSQL(c)})")
+ childrenSql.mkString(" UNION DISTINCT ")
+
case p: Union if p.children.length > 1 =>
- val childrenSql = p.children.map(toSQL(_))
+ val childrenSql = p.children.map(c => s"(${toSQL(c)})")
childrenSql.mkString(" UNION ALL ")
+ case p: Intersect =>
+ build("(" + toSQL(p.left), ") INTERSECT (", toSQL(p.right) + ")")
+
+ case p: Except =>
+ build("(" + toSQL(p.left), ") EXCEPT (", toSQL(p.right) + ")")
+
case p: SubqueryAlias =>
p.child match {
// Persisted data source relation
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index b162adf215..28559eac8d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -114,6 +114,27 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0")
}
+ test("union distinct") {
+ checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0")
+ }
+
+ // Parser is unable to parse the following query:
+ // SELECT `u_1`.`id`
+ // FROM (((SELECT `t0`.`id` FROM `default`.`t0`)
+ // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`))
+ // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1
+ test("three-child union") {
+ checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
+ }
+
+ test("intersect") {
+ checkHiveQl("SELECT * FROM t0 INTERSECT SELECT * FROM t0")
+ }
+
+ test("except") {
+ checkHiveQl("SELECT * FROM t0 EXCEPT SELECT * FROM t0")
+ }
+
test("self join") {
checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
}
@@ -122,9 +143,6 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key")
}
- test("three-child union") {
- checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
- }
test("case") {
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0")