diff options
author | Herman van Hovell <hvanhovell@databricks.com> | 2016-08-16 23:09:53 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-08-16 23:09:53 -0700 |
commit | f7c9ff57c17a950cccdc26aadf8768c899a4d572 (patch) | |
tree | 6ff7233e26d192dac7d1af38708707e74d84401a /sql/catalyst/src/main/scala/org/apache | |
parent | 4a2c375be2bcd98cc7e00bea920fd6a0f68a4e14 (diff) | |
download | spark-f7c9ff57c17a950cccdc26aadf8768c899a4d572.tar.gz spark-f7c9ff57c17a950cccdc26aadf8768c899a4d572.tar.bz2 spark-f7c9ff57c17a950cccdc26aadf8768c899a4d572.zip |
[SPARK-17068][SQL] Make view-usage visible during analysis
## What changes were proposed in this pull request?
This PR adds a field to subquery alias in order to make the usage of views in a resolved `LogicalPlan` more visible (and more understandable).
For example, the following view and query:
```sql
create view constants as select 1 as id union all select 1 union all select 42
select * from constants;
```
...now yields the following analyzed plan:
```
Project [id#39]
+- SubqueryAlias c, `default`.`constants`
+- Project [gen_attr_0#36 AS id#39]
+- SubqueryAlias gen_subquery_0
+- Union
:- Union
: :- Project [1 AS gen_attr_0#36]
: : +- OneRowRelation$
: +- Project [1 AS gen_attr_1#37]
: +- OneRowRelation$
+- Project [42 AS gen_attr_2#38]
+- OneRowRelation$
```
## How was this patch tested?
Added tests for the two code paths in `SessionCatalogSuite` (sql/core) and `HiveMetastoreCatalogSuite` (sql/hive)
Author: Herman van Hovell <hvanhovell@databricks.com>
Closes #14657 from hvanhovell/SPARK-17068.
Diffstat (limited to 'sql/catalyst/src/main/scala/org/apache')
8 files changed, 38 insertions, 31 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a2a022c247..bd4c19181f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -138,7 +138,7 @@ class Analyzer( case u : UnresolvedRelation => val substituted = cteRelations.find(x => resolver(x._1, u.tableIdentifier.table)) .map(_._2).map { relation => - val withAlias = u.alias.map(SubqueryAlias(_, relation)) + val withAlias = u.alias.map(SubqueryAlias(_, relation, None)) withAlias.getOrElse(relation) } substituted.getOrElse(u) @@ -2057,7 +2057,7 @@ class Analyzer( */ object EliminateSubqueryAliases extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { - case SubqueryAlias(_, child) => child + case SubqueryAlias(_, child, _) => child } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 41b7e62d8c..e07e9194be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -141,8 +141,8 @@ trait CheckAnalysis extends PredicateHelper { // Skip projects and subquery aliases added by the Analyzer and the SQLBuilder. def cleanQuery(p: LogicalPlan): LogicalPlan = p match { - case SubqueryAlias(_, child) => cleanQuery(child) - case Project(_, child) => cleanQuery(child) + case s: SubqueryAlias => cleanQuery(s.child) + case p: Project => cleanQuery(p.child) case child => child } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 00c3db0aac..62d0da076b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -411,27 +411,29 @@ class SessionCatalog( } /** - * Return a [[LogicalPlan]] that represents the given table. + * Return a [[LogicalPlan]] that represents the given table or view. * - * If a database is specified in `name`, this will return the table from that database. - * If no database is specified, this will first attempt to return a temporary table with - * the same name, then, if that does not exist, return the table from the current database. + * If a database is specified in `name`, this will return the table/view from that database. + * If no database is specified, this will first attempt to return a temporary table/view with + * the same name, then, if that does not exist, return the table/view from the current database. + * + * If the relation is a view, the relation will be wrapped in a [[SubqueryAlias]] which will + * track the name of the view. */ def lookupRelation(name: TableIdentifier, alias: Option[String] = None): LogicalPlan = { synchronized { val db = formatDatabaseName(name.database.getOrElse(currentDb)) val table = formatTableName(name.table) - val relation = - if (name.database.isDefined || !tempTables.contains(table)) { - val metadata = externalCatalog.getTable(db, table) - SimpleCatalogRelation(db, metadata) - } else { - tempTables(table) + val relationAlias = alias.getOrElse(table) + if (name.database.isDefined || !tempTables.contains(table)) { + val metadata = externalCatalog.getTable(db, table) + val view = Option(metadata.tableType).collect { + case CatalogTableType.VIEW => name } - val qualifiedTable = SubqueryAlias(table, relation) - // If an alias was specified by the lookup, wrap the plan in a subquery so that - // attributes are properly qualified with this alias. - alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable) + SubqueryAlias(relationAlias, SimpleCatalogRelation(db, metadata), view) + } else { + SubqueryAlias(relationAlias, tempTables(table), Option(name)) + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 5181dcc786..9f54d709a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -343,7 +343,7 @@ package object dsl { orderSpec: Seq[SortOrder]): LogicalPlan = Window(windowExpressions, partitionSpec, orderSpec, logicalPlan) - def subquery(alias: Symbol): LogicalPlan = SubqueryAlias(alias.name, logicalPlan) + def subquery(alias: Symbol): LogicalPlan = SubqueryAlias(alias.name, logicalPlan, None) def except(otherPlan: LogicalPlan): LogicalPlan = Except(logicalPlan, otherPlan) @@ -367,7 +367,7 @@ package object dsl { def as(alias: String): LogicalPlan = logicalPlan match { case UnresolvedRelation(tbl, _) => UnresolvedRelation(tbl, Option(alias)) - case plan => SubqueryAlias(alias, plan) + case plan => SubqueryAlias(alias, plan, None) } def repartition(num: Integer): LogicalPlan = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala index ac44f08897..ddbe937cba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala @@ -72,7 +72,7 @@ case class ScalarSubquery( override def dataType: DataType = query.schema.fields.head.dataType override def foldable: Boolean = false override def nullable: Boolean = true - override def plan: LogicalPlan = SubqueryAlias(toString, query) + override def plan: LogicalPlan = SubqueryAlias(toString, query, None) override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(query = plan) override def toString: String = s"scalar-subquery#${exprId.id} $conditionString" } @@ -100,7 +100,7 @@ case class PredicateSubquery( override lazy val resolved = childrenResolved && query.resolved override lazy val references: AttributeSet = super.references -- query.outputSet override def nullable: Boolean = nullAware - override def plan: LogicalPlan = SubqueryAlias(toString, query) + override def plan: LogicalPlan = SubqueryAlias(toString, query, None) override def withNewPlan(plan: LogicalPlan): PredicateSubquery = copy(query = plan) override def semanticEquals(o: Expression): Boolean = o match { case p: PredicateSubquery => @@ -153,7 +153,7 @@ case class ListQuery(query: LogicalPlan, exprId: ExprId = NamedExpression.newExp override def dataType: DataType = ArrayType(NullType) override def nullable: Boolean = false override def withNewPlan(plan: LogicalPlan): ListQuery = copy(query = plan) - override def plan: LogicalPlan = SubqueryAlias(toString, query) + override def plan: LogicalPlan = SubqueryAlias(toString, query, None) override def toString: String = s"list#${exprId.id}" } @@ -174,6 +174,6 @@ case class Exists(query: LogicalPlan, exprId: ExprId = NamedExpression.newExprId override def children: Seq[Expression] = Seq.empty override def nullable: Boolean = false override def withNewPlan(plan: LogicalPlan): Exists = copy(query = plan) - override def plan: LogicalPlan = SubqueryAlias(toString, query) + override def plan: LogicalPlan = SubqueryAlias(toString, query, None) override def toString: String = s"exists#${exprId.id}" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index e34a478818..f97a78b411 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1862,7 +1862,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { // and Project operators, followed by an optional Filter, followed by an // Aggregate. Traverse the operators recursively. def evalPlan(lp : LogicalPlan) : Map[ExprId, Option[Any]] = lp match { - case SubqueryAlias(_, child) => evalPlan(child) + case SubqueryAlias(_, child, _) => evalPlan(child) case Filter(condition, child) => val bindings = evalPlan(child) if (bindings.isEmpty) bindings @@ -1920,7 +1920,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { topPart += p bottomPart = child - case s @ SubqueryAlias(_, child) => + case s @ SubqueryAlias(_, child, _) => topPart += s bottomPart = child @@ -1991,8 +1991,8 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] { topPart.reverse.foreach { case Project(projList, _) => subqueryRoot = Project(projList ++ havingInputs, subqueryRoot) - case s @ SubqueryAlias(alias, _) => - subqueryRoot = SubqueryAlias(alias, subqueryRoot) + case s @ SubqueryAlias(alias, _, None) => + subqueryRoot = SubqueryAlias(alias, subqueryRoot, None) case op => sys.error(s"Unexpected operator $op in corelated subquery") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 09b650ce18..adf78396d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -107,7 +107,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * This is only used for Common Table Expressions. */ override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) { - SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith)) + SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith), None) } /** @@ -723,7 +723,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Create an alias (SubqueryAlias) for a LogicalPlan. */ private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = { - SubqueryAlias(alias.getText, plan) + SubqueryAlias(alias.getText, plan, None) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 2917d8d2a9..af1736e607 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical import scala.collection.mutable.ArrayBuffer +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression @@ -693,7 +694,11 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo } } -case class SubqueryAlias(alias: String, child: LogicalPlan) extends UnaryNode { +case class SubqueryAlias( + alias: String, + child: LogicalPlan, + view: Option[TableIdentifier]) + extends UnaryNode { override def output: Seq[Attribute] = child.output.map(_.withQualifier(Some(alias))) } |