diff options
author | Liang-Chi Hsieh <simonh@tw.ibm.com> | 2016-09-21 06:53:42 -0700 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-09-21 06:53:42 -0700 |
commit | 248922fd4fb7c11a40304431e8cc667a8911a906 (patch) | |
tree | dbba079a41bc7a83adb4d583787c71d9252bdfd4 /sql/catalyst | |
parent | dd7561d33761d119ded09cfba072147292bf6964 (diff) | |
download | spark-248922fd4fb7c11a40304431e8cc667a8911a906.tar.gz spark-248922fd4fb7c11a40304431e8cc667a8911a906.tar.bz2 spark-248922fd4fb7c11a40304431e8cc667a8911a906.zip |
[SPARK-17590][SQL] Analyze CTE definitions at once and allow CTE subquery to define CTE
## What changes were proposed in this pull request?
We substitute logical plan with CTE definitions in the analyzer rule CTESubstitution. A CTE definition can be used in the logical plan for multiple times, and its analyzed logical plan should be the same. We should not analyze CTE definitions multiple times when they are reused in the query.
By analyzing CTE definitions before substitution, we can support defining CTE in subquery.
## How was this patch tested?
Jenkins tests.
Author: Liang-Chi Hsieh <simonh@tw.ibm.com>
Closes #15146 from viirya/cte-analysis-once.
Diffstat (limited to 'sql/catalyst')
3 files changed, 4 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 7023c0c8c4..de2f9ee6bc 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -262,7 +262,7 @@ ctes ; namedQuery - : name=identifier AS? '(' queryNoWith ')' + : name=identifier AS? '(' query ')' ; tableProvider diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index cc62d5e7c8..ae8869ff25 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -116,15 +116,14 @@ class Analyzer( ) /** - * Substitute child plan with cte definitions + * Analyze cte definitions and substitute child plan with analyzed cte definitions. */ object CTESubstitution extends Rule[LogicalPlan] { - // TODO allow subquery to define CTE def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case With(child, relations) => substituteCTE(child, relations.foldLeft(Seq.empty[(String, LogicalPlan)]) { case (resolved, (name, relation)) => - resolved :+ name -> ResolveRelations(substituteCTE(relation, resolved)) + resolved :+ name -> execute(substituteCTE(relation, resolved)) }) case other => other } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 69d68fa6f9..12a70b7769 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -108,7 +108,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * This is only used for Common Table Expressions. */ override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) { - SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith), None) + SubqueryAlias(ctx.name.getText, plan(ctx.query), None) } /** |