aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-03-18 10:16:48 +0800
committerWenchen Fan <wenchen@databricks.com>2016-03-18 10:16:48 +0800
commit6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb (patch)
treea5c32ee23bc0c2c0df236a4de03ea5f9f8948906 /sql/hive
parent453455c479308badeeef2ad000577800982b1d4f (diff)
downloadspark-6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb.tar.gz
spark-6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb.tar.bz2
spark-6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb.zip
[SPARK-13976][SQL] do not remove sub-queries added by user when generate SQL
## What changes were proposed in this pull request? We haven't figured out the corrected logical to add sub-queries yet, so we should not clear all sub-queries before generate SQL. This PR changed the logic to only remove sub-queries above table relation. an example for this bug, original SQL: `SELECT a FROM (SELECT a FROM tbl) t WHERE a = 1` before this PR, we will generate: ``` SELECT attr_1 AS a FROM SELECT attr_1 FROM ( SELECT a AS attr_1 FROM tbl ) AS sub_q0 WHERE attr_1 = 1 ``` We missed a sub-query and this SQL string is illegal. After this PR, we will generate: ``` SELECT attr_1 AS a FROM ( SELECT attr_1 FROM ( SELECT a AS attr_1 FROM tbl ) AS sub_q0 WHERE attr_1 = 1 ) AS t ``` TODO: for long term, we should find a way to add sub-queries correctly, so that arbitrary logical plans can be converted to SQL string. ## How was this patch tested? `LogicalPlanToSQLSuite` Author: Wenchen Fan <wenchen@databricks.com> Closes #11786 from cloud-fan/bug-fix.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala12
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala4
2 files changed, 14 insertions, 2 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 9fa8474226..da05905a89 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -390,8 +390,6 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
// `Aggregate`s.
CollapseProject),
Batch("Recover Scoping Info", Once,
- // Remove all sub queries, as we will insert new ones when it's necessary.
- EliminateSubqueryAliases,
// A logical plan is allowed to have same-name outputs with different qualifiers(e.g. the
// `Join` operator). However, this kind of plan can't be put under a sub query as we will
// erase and assign a new qualifier to all outputs and make it impossible to distinguish
@@ -400,6 +398,10 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
// qualifiers, as attributes have unique names now and we don't need qualifiers to resolve
// ambiguity.
NormalizedAttribute,
+ // Our analyzer will add one or more sub-queries above table relation, this rule removes
+ // these sub-queries so that next rule can combine adjacent table relation and sample to
+ // SQLTable.
+ RemoveSubqueriesAboveSQLTable,
// Finds the table relations and wrap them with `SQLTable`s. If there are any `Sample`
// operators on top of a table relation, merge the sample information into `SQLTable` of
// that table relation, as we can only convert table sample to standard SQL string.
@@ -418,6 +420,12 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
}
}
+ object RemoveSubqueriesAboveSQLTable extends Rule[LogicalPlan] {
+ override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+ case SubqueryAlias(_, t @ ExtractSQLTable(_)) => t
+ }
+ }
+
object ResolveSQLTable extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown {
case Sample(lowerBound, upperBound, _, _, ExtractSQLTable(table)) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index f3cb6f8511..f86eba6349 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -737,4 +737,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
|LIMIT 5
""".stripMargin)
}
+
+ test("filter after subquery") {
+ checkHiveQl("SELECT a FROM (SELECT key + 1 AS a FROM parquet_t1) t WHERE a > 5")
+ }
}