aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main/scala/org/apache
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@databricks.com>2016-08-16 23:09:53 -0700
committerReynold Xin <rxin@databricks.com>2016-08-16 23:09:53 -0700
commitf7c9ff57c17a950cccdc26aadf8768c899a4d572 (patch)
tree6ff7233e26d192dac7d1af38708707e74d84401a /sql/catalyst/src/main/scala/org/apache
parent4a2c375be2bcd98cc7e00bea920fd6a0f68a4e14 (diff)
downloadspark-f7c9ff57c17a950cccdc26aadf8768c899a4d572.tar.gz
spark-f7c9ff57c17a950cccdc26aadf8768c899a4d572.tar.bz2
spark-f7c9ff57c17a950cccdc26aadf8768c899a4d572.zip
[SPARK-17068][SQL] Make view-usage visible during analysis
## What changes were proposed in this pull request? This PR adds a field to subquery alias in order to make the usage of views in a resolved `LogicalPlan` more visible (and more understandable). For example, the following view and query: ```sql create view constants as select 1 as id union all select 1 union all select 42 select * from constants; ``` ...now yields the following analyzed plan: ``` Project [id#39] +- SubqueryAlias c, `default`.`constants` +- Project [gen_attr_0#36 AS id#39] +- SubqueryAlias gen_subquery_0 +- Union :- Union : :- Project [1 AS gen_attr_0#36] : : +- OneRowRelation$ : +- Project [1 AS gen_attr_1#37] : +- OneRowRelation$ +- Project [42 AS gen_attr_2#38] +- OneRowRelation$ ``` ## How was this patch tested? Added tests for the two code paths in `SessionCatalogSuite` (sql/core) and `HiveMetastoreCatalogSuite` (sql/hive) Author: Herman van Hovell <hvanhovell@databricks.com> Closes #14657 from hvanhovell/SPARK-17068.
Diffstat (limited to 'sql/catalyst/src/main/scala/org/apache')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala30
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala8
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala8
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala7
8 files changed, 38 insertions, 31 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a2a022c247..bd4c19181f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -138,7 +138,7 @@ class Analyzer(
case u : UnresolvedRelation =>
val substituted = cteRelations.find(x => resolver(x._1, u.tableIdentifier.table))
.map(_._2).map { relation =>
- val withAlias = u.alias.map(SubqueryAlias(_, relation))
+ val withAlias = u.alias.map(SubqueryAlias(_, relation, None))
withAlias.getOrElse(relation)
}
substituted.getOrElse(u)
@@ -2057,7 +2057,7 @@ class Analyzer(
*/
object EliminateSubqueryAliases extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
- case SubqueryAlias(_, child) => child
+ case SubqueryAlias(_, child, _) => child
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 41b7e62d8c..e07e9194be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -141,8 +141,8 @@ trait CheckAnalysis extends PredicateHelper {
// Skip projects and subquery aliases added by the Analyzer and the SQLBuilder.
def cleanQuery(p: LogicalPlan): LogicalPlan = p match {
- case SubqueryAlias(_, child) => cleanQuery(child)
- case Project(_, child) => cleanQuery(child)
+ case s: SubqueryAlias => cleanQuery(s.child)
+ case p: Project => cleanQuery(p.child)
case child => child
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 00c3db0aac..62d0da076b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -411,27 +411,29 @@ class SessionCatalog(
}
/**
- * Return a [[LogicalPlan]] that represents the given table.
+ * Return a [[LogicalPlan]] that represents the given table or view.
*
- * If a database is specified in `name`, this will return the table from that database.
- * If no database is specified, this will first attempt to return a temporary table with
- * the same name, then, if that does not exist, return the table from the current database.
+ * If a database is specified in `name`, this will return the table/view from that database.
+ * If no database is specified, this will first attempt to return a temporary table/view with
+ * the same name, then, if that does not exist, return the table/view from the current database.
+ *
+ * If the relation is a view, the relation will be wrapped in a [[SubqueryAlias]] which will
+ * track the name of the view.
*/
def lookupRelation(name: TableIdentifier, alias: Option[String] = None): LogicalPlan = {
synchronized {
val db = formatDatabaseName(name.database.getOrElse(currentDb))
val table = formatTableName(name.table)
- val relation =
- if (name.database.isDefined || !tempTables.contains(table)) {
- val metadata = externalCatalog.getTable(db, table)
- SimpleCatalogRelation(db, metadata)
- } else {
- tempTables(table)
+ val relationAlias = alias.getOrElse(table)
+ if (name.database.isDefined || !tempTables.contains(table)) {
+ val metadata = externalCatalog.getTable(db, table)
+ val view = Option(metadata.tableType).collect {
+ case CatalogTableType.VIEW => name
}
- val qualifiedTable = SubqueryAlias(table, relation)
- // If an alias was specified by the lookup, wrap the plan in a subquery so that
- // attributes are properly qualified with this alias.
- alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable)
+ SubqueryAlias(relationAlias, SimpleCatalogRelation(db, metadata), view)
+ } else {
+ SubqueryAlias(relationAlias, tempTables(table), Option(name))
+ }
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 5181dcc786..9f54d709a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -343,7 +343,7 @@ package object dsl {
orderSpec: Seq[SortOrder]): LogicalPlan =
Window(windowExpressions, partitionSpec, orderSpec, logicalPlan)
- def subquery(alias: Symbol): LogicalPlan = SubqueryAlias(alias.name, logicalPlan)
+ def subquery(alias: Symbol): LogicalPlan = SubqueryAlias(alias.name, logicalPlan, None)
def except(otherPlan: LogicalPlan): LogicalPlan = Except(logicalPlan, otherPlan)
@@ -367,7 +367,7 @@ package object dsl {
def as(alias: String): LogicalPlan = logicalPlan match {
case UnresolvedRelation(tbl, _) => UnresolvedRelation(tbl, Option(alias))
- case plan => SubqueryAlias(alias, plan)
+ case plan => SubqueryAlias(alias, plan, None)
}
def repartition(num: Integer): LogicalPlan =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index ac44f08897..ddbe937cba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -72,7 +72,7 @@ case class ScalarSubquery(
override def dataType: DataType = query.schema.fields.head.dataType
override def foldable: Boolean = false
override def nullable: Boolean = true
- override def plan: LogicalPlan = SubqueryAlias(toString, query)
+ override def plan: LogicalPlan = SubqueryAlias(toString, query, None)
override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(query = plan)
override def toString: String = s"scalar-subquery#${exprId.id} $conditionString"
}
@@ -100,7 +100,7 @@ case class PredicateSubquery(
override lazy val resolved = childrenResolved && query.resolved
override lazy val references: AttributeSet = super.references -- query.outputSet
override def nullable: Boolean = nullAware
- override def plan: LogicalPlan = SubqueryAlias(toString, query)
+ override def plan: LogicalPlan = SubqueryAlias(toString, query, None)
override def withNewPlan(plan: LogicalPlan): PredicateSubquery = copy(query = plan)
override def semanticEquals(o: Expression): Boolean = o match {
case p: PredicateSubquery =>
@@ -153,7 +153,7 @@ case class ListQuery(query: LogicalPlan, exprId: ExprId = NamedExpression.newExp
override def dataType: DataType = ArrayType(NullType)
override def nullable: Boolean = false
override def withNewPlan(plan: LogicalPlan): ListQuery = copy(query = plan)
- override def plan: LogicalPlan = SubqueryAlias(toString, query)
+ override def plan: LogicalPlan = SubqueryAlias(toString, query, None)
override def toString: String = s"list#${exprId.id}"
}
@@ -174,6 +174,6 @@ case class Exists(query: LogicalPlan, exprId: ExprId = NamedExpression.newExprId
override def children: Seq[Expression] = Seq.empty
override def nullable: Boolean = false
override def withNewPlan(plan: LogicalPlan): Exists = copy(query = plan)
- override def plan: LogicalPlan = SubqueryAlias(toString, query)
+ override def plan: LogicalPlan = SubqueryAlias(toString, query, None)
override def toString: String = s"exists#${exprId.id}"
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index e34a478818..f97a78b411 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1862,7 +1862,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
// and Project operators, followed by an optional Filter, followed by an
// Aggregate. Traverse the operators recursively.
def evalPlan(lp : LogicalPlan) : Map[ExprId, Option[Any]] = lp match {
- case SubqueryAlias(_, child) => evalPlan(child)
+ case SubqueryAlias(_, child, _) => evalPlan(child)
case Filter(condition, child) =>
val bindings = evalPlan(child)
if (bindings.isEmpty) bindings
@@ -1920,7 +1920,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
topPart += p
bottomPart = child
- case s @ SubqueryAlias(_, child) =>
+ case s @ SubqueryAlias(_, child, _) =>
topPart += s
bottomPart = child
@@ -1991,8 +1991,8 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
topPart.reverse.foreach {
case Project(projList, _) =>
subqueryRoot = Project(projList ++ havingInputs, subqueryRoot)
- case s @ SubqueryAlias(alias, _) =>
- subqueryRoot = SubqueryAlias(alias, subqueryRoot)
+ case s @ SubqueryAlias(alias, _, None) =>
+ subqueryRoot = SubqueryAlias(alias, subqueryRoot, None)
case op => sys.error(s"Unexpected operator $op in corelated subquery")
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 09b650ce18..adf78396d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -107,7 +107,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* This is only used for Common Table Expressions.
*/
override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) {
- SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith))
+ SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith), None)
}
/**
@@ -723,7 +723,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
* Create an alias (SubqueryAlias) for a LogicalPlan.
*/
private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = {
- SubqueryAlias(alias.getText, plan)
+ SubqueryAlias(alias.getText, plan, None)
}
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 2917d8d2a9..af1736e607 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical
import scala.collection.mutable.ArrayBuffer
+import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -693,7 +694,11 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo
}
}
-case class SubqueryAlias(alias: String, child: LogicalPlan) extends UnaryNode {
+case class SubqueryAlias(
+ alias: String,
+ child: LogicalPlan,
+ view: Option[TableIdentifier])
+ extends UnaryNode {
override def output: Seq[Attribute] = child.output.map(_.withQualifier(Some(alias)))
}