diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-03-18 10:16:48 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-03-18 10:16:48 +0800 |
commit | 6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb (patch) | |
tree | a5c32ee23bc0c2c0df236a4de03ea5f9f8948906 | |
parent | 453455c479308badeeef2ad000577800982b1d4f (diff) | |
download | spark-6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb.tar.gz spark-6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb.tar.bz2 spark-6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb.zip |
[SPARK-13976][SQL] do not remove sub-queries added by user when generate SQL
## What changes were proposed in this pull request?
We haven't figured out the corrected logical to add sub-queries yet, so we should not clear all sub-queries before generate SQL. This PR changed the logic to only remove sub-queries above table relation.
an example for this bug, original SQL: `SELECT a FROM (SELECT a FROM tbl) t WHERE a = 1`
before this PR, we will generate:
```
SELECT attr_1 AS a FROM
SELECT attr_1 FROM (
SELECT a AS attr_1 FROM tbl
) AS sub_q0
WHERE attr_1 = 1
```
We missed a sub-query and this SQL string is illegal.
After this PR, we will generate:
```
SELECT attr_1 AS a FROM (
SELECT attr_1 FROM (
SELECT a AS attr_1 FROM tbl
) AS sub_q0
WHERE attr_1 = 1
) AS t
```
TODO: for long term, we should find a way to add sub-queries correctly, so that arbitrary logical plans can be converted to SQL string.
## How was this patch tested?
`LogicalPlanToSQLSuite`
Author: Wenchen Fan <wenchen@databricks.com>
Closes #11786 from cloud-fan/bug-fix.
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala | 12 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala | 4 |
2 files changed, 14 insertions, 2 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 9fa8474226..da05905a89 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -390,8 +390,6 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi // `Aggregate`s. CollapseProject), Batch("Recover Scoping Info", Once, - // Remove all sub queries, as we will insert new ones when it's necessary. - EliminateSubqueryAliases, // A logical plan is allowed to have same-name outputs with different qualifiers(e.g. the // `Join` operator). However, this kind of plan can't be put under a sub query as we will // erase and assign a new qualifier to all outputs and make it impossible to distinguish @@ -400,6 +398,10 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi // qualifiers, as attributes have unique names now and we don't need qualifiers to resolve // ambiguity. NormalizedAttribute, + // Our analyzer will add one or more sub-queries above table relation, this rule removes + // these sub-queries so that next rule can combine adjacent table relation and sample to + // SQLTable. + RemoveSubqueriesAboveSQLTable, // Finds the table relations and wrap them with `SQLTable`s. If there are any `Sample` // operators on top of a table relation, merge the sample information into `SQLTable` of // that table relation, as we can only convert table sample to standard SQL string. @@ -418,6 +420,12 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi } } + object RemoveSubqueriesAboveSQLTable extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + case SubqueryAlias(_, t @ ExtractSQLTable(_)) => t + } + } + object ResolveSQLTable extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown { case Sample(lowerBound, upperBound, _, _, ExtractSQLTable(table)) => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index f3cb6f8511..f86eba6349 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -737,4 +737,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { |LIMIT 5 """.stripMargin) } + + test("filter after subquery") { + checkHiveQl("SELECT a FROM (SELECT key + 1 AS a FROM parquet_t1) t WHERE a > 5") + } } |