diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-03-18 19:42:33 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-03-18 19:42:33 +0800 |
commit | 0f1015ffdd40cd8647f6acdd5cdd717b883e4875 (patch) | |
tree | 60fc71f07a9a8ee02c404b07c8e46fe5d72850f5 /sql | |
parent | 7783b6f38ffb320050e1c826134187cd0f29ee9b (diff) | |
download | spark-0f1015ffdd40cd8647f6acdd5cdd717b883e4875.tar.gz spark-0f1015ffdd40cd8647f6acdd5cdd717b883e4875.tar.bz2 spark-0f1015ffdd40cd8647f6acdd5cdd717b883e4875.zip |
[SPARK-14001][SQL] support multi-children Union in SQLBuilder
## What changes were proposed in this pull request?
The fix is simple, use the existing `CombineUnions` rule to combine adjacent Unions before build SQL string.
## How was this patch tested?
The re-enabled test
Author: Wenchen Fan <wenchen@databricks.com>
Closes #11818 from cloud-fan/bug-fix.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala | 14 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala | 7 |
2 files changed, 11 insertions, 10 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 249a685b9f..5175bd4609 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -24,9 +24,8 @@ import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.optimizer.CollapseProject +import org.apache.spark.sql.catalyst.optimizer.{CollapseProject, CombineUnions} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} import org.apache.spark.sql.catalyst.util.quoteIdentifier @@ -384,11 +383,18 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi object Canonicalizer extends RuleExecutor[LogicalPlan] { override protected def batches: Seq[Batch] = Seq( - Batch("Collapse Project", FixedPoint(100), + Batch("Prepare", FixedPoint(100), // The `WidenSetOperationTypes` analysis rule may introduce extra `Project`s over // `Aggregate`s to perform type casting. This rule merges these `Project`s into // `Aggregate`s. - CollapseProject), + CollapseProject, + // Parser is unable to parse the following query: + // SELECT `u_1`.`id` + // FROM (((SELECT `t0`.`id` FROM `default`.`t0`) + // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) + // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1 + // This rule combine adjacent Unions together so we can generate flat UNION ALL SQL string. + CombineUnions), Batch("Recover Scoping Info", Once, // A logical plan is allowed to have same-name outputs with different qualifiers(e.g. the // `Join` operator). However, this kind of plan can't be put under a sub query as we will diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index f86eba6349..f6b9072da4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -141,12 +141,7 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT * FROM t0 UNION SELECT * FROM t0") } - // Parser is unable to parse the following query: - // SELECT `u_1`.`id` - // FROM (((SELECT `t0`.`id` FROM `default`.`t0`) - // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) - // UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1 - ignore("three-child union") { + test("three-child union") { checkHiveQl( """ |SELECT id FROM parquet_t0 |