From 65fe902e13153ad73a3026a66e73c93393df1abb Mon Sep 17 00:00:00 2001 From: windpiger Date: Sun, 19 Feb 2017 16:50:16 -0800 Subject: [SPARK-19598][SQL] Remove the alias parameter in UnresolvedRelation ## What changes were proposed in this pull request? Remove the alias parameter in `UnresolvedRelation`, and use `SubqueryAlias` to replace it. This can simplify some `match case` situations. For example, the broadcast hint pull request can have one fewer case https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala#L57-L61 ## How was this patch tested? add some unit tests Author: windpiger Closes #16956 from windpiger/removeUnresolveTableAlias. --- .../spark/sql/catalyst/analysis/Analyzer.scala | 10 ++----- .../spark/sql/catalyst/analysis/ResolveHints.scala | 6 ++-- .../spark/sql/catalyst/analysis/unresolved.scala | 5 +--- .../sql/catalyst/catalog/SessionCatalog.scala | 12 ++++---- .../apache/spark/sql/catalyst/dsl/package.scala | 10 ++----- .../spark/sql/catalyst/parser/AstBuilder.scala | 16 +++++++---- .../sql/catalyst/analysis/AnalysisSuite.scala | 32 ++++++++++++++-------- .../sql/catalyst/catalog/SessionCatalogSuite.scala | 22 --------------- .../spark/sql/execution/datasources/rules.scala | 3 +- .../scala/org/apache/spark/sql/JoinSuite.scala | 4 +-- .../execution/benchmark/TPCDSQueryBenchmark.scala | 4 +-- .../org/apache/spark/sql/hive/test/TestHive.scala | 2 +- 12 files changed, 51 insertions(+), 75 deletions(-) (limited to 'sql') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index cd517a98ac..39a276284c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -180,12 +180,8 @@ class Analyzer( def substituteCTE(plan: LogicalPlan, cteRelations: Seq[(String, LogicalPlan)]): LogicalPlan = { plan transformDown { case u : UnresolvedRelation => - val substituted = cteRelations.find(x => resolver(x._1, u.tableIdentifier.table)) - .map(_._2).map { relation => - val withAlias = u.alias.map(SubqueryAlias(_, relation, None)) - withAlias.getOrElse(relation) - } - substituted.getOrElse(u) + cteRelations.find(x => resolver(x._1, u.tableIdentifier.table)) + .map(_._2).getOrElse(u) case other => // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE. other transformExpressions { @@ -623,7 +619,7 @@ class Analyzer( val tableIdentWithDb = u.tableIdentifier.copy( database = u.tableIdentifier.database.orElse(defaultDatabase)) try { - catalog.lookupRelation(tableIdentWithDb, u.alias) + catalog.lookupRelation(tableIdentWithDb) } catch { case _: NoSuchTableException => u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala index 2124177461..70438eb591 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala @@ -54,10 +54,8 @@ object ResolveHints { val newNode = CurrentOrigin.withOrigin(plan.origin) { plan match { - case r: UnresolvedRelation => - val alias = r.alias.getOrElse(r.tableIdentifier.table) - if (toBroadcast.exists(resolver(_, alias))) BroadcastHint(plan) else plan - + case u: UnresolvedRelation if toBroadcast.exists(resolver(_, u.tableIdentifier.table)) => + BroadcastHint(plan) case r: SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) => BroadcastHint(plan) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 36ed9ba503..262b894e2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -37,10 +37,7 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str /** * Holds the name of a relation that has yet to be looked up in a catalog. */ -case class UnresolvedRelation( - tableIdentifier: TableIdentifier, - alias: Option[String] = None) extends LeafNode { - +case class UnresolvedRelation(tableIdentifier: TableIdentifier) extends LeafNode { /** Returns a `.` separated name for this relation. */ def tableName: String = tableIdentifier.unquotedString diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index dd0c5cb706..73ef0e6a18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -572,16 +572,14 @@ class SessionCatalog( * wrap the logical plan in a [[SubqueryAlias]] which will track the name of the view. * * @param name The name of the table/view that we look up. - * @param alias The alias name of the table/view that we look up. */ - def lookupRelation(name: TableIdentifier, alias: Option[String] = None): LogicalPlan = { + def lookupRelation(name: TableIdentifier): LogicalPlan = { synchronized { val db = formatDatabaseName(name.database.getOrElse(currentDb)) val table = formatTableName(name.table) - val relationAlias = alias.getOrElse(table) if (db == globalTempViewManager.database) { globalTempViewManager.get(table).map { viewDef => - SubqueryAlias(relationAlias, viewDef, None) + SubqueryAlias(table, viewDef, None) }.getOrElse(throw new NoSuchTableException(db, table)) } else if (name.database.isDefined || !tempTables.contains(table)) { val metadata = externalCatalog.getTable(db, table) @@ -594,12 +592,12 @@ class SessionCatalog( desc = metadata, output = metadata.schema.toAttributes, child = parser.parsePlan(viewText)) - SubqueryAlias(relationAlias, child, Some(name.copy(table = table, database = Some(db)))) + SubqueryAlias(table, child, Some(name.copy(table = table, database = Some(db)))) } else { - SubqueryAlias(relationAlias, SimpleCatalogRelation(metadata), None) + SubqueryAlias(table, SimpleCatalogRelation(metadata), None) } } else { - SubqueryAlias(relationAlias, tempTables(table), None) + SubqueryAlias(table, tempTables(table), None) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 66e52ca68a..3c53132339 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -280,11 +280,10 @@ package object dsl { object expressions extends ExpressionConversions // scalastyle:ignore object plans { // scalastyle:ignore - def table(ref: String): LogicalPlan = - UnresolvedRelation(TableIdentifier(ref), None) + def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref)) def table(db: String, ref: String): LogicalPlan = - UnresolvedRelation(TableIdentifier(ref, Option(db)), None) + UnresolvedRelation(TableIdentifier(ref, Option(db))) implicit class DslLogicalPlan(val logicalPlan: LogicalPlan) { def select(exprs: Expression*): LogicalPlan = { @@ -369,10 +368,7 @@ package object dsl { analysis.UnresolvedRelation(TableIdentifier(tableName)), Map.empty, logicalPlan, overwrite, false) - def as(alias: String): LogicalPlan = logicalPlan match { - case UnresolvedRelation(tbl, _) => UnresolvedRelation(tbl, Option(alias)) - case plan => SubqueryAlias(alias, plan, None) - } + def as(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan, None) def repartition(num: Integer): LogicalPlan = Repartition(num, shuffle = true, logicalPlan) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index bbb9922c18..08a6dd136b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -179,7 +179,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } InsertIntoTable( - UnresolvedRelation(tableIdent, None), + UnresolvedRelation(tableIdent), partitionKeys, query, ctx.OVERWRITE != null, @@ -645,17 +645,21 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * }}} */ override def visitTable(ctx: TableContext): LogicalPlan = withOrigin(ctx) { - UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier), None) + UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier)) } /** * Create an aliased table reference. This is typically used in FROM clauses. */ override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) { - val table = UnresolvedRelation( - visitTableIdentifier(ctx.tableIdentifier), - Option(ctx.strictIdentifier).map(_.getText)) - table.optionalMap(ctx.sample)(withSample) + val table = UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier)) + + val tableWithAlias = Option(ctx.strictIdentifier).map(_.getText) match { + case Some(strictIdentifier) => + SubqueryAlias(strictIdentifier, table, None) + case _ => table + } + tableWithAlias.optionalMap(ctx.sample)(withSample) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 81a97dc1ff..786e0f49b4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -61,23 +61,23 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { checkAnalysis( Project(Seq(UnresolvedAttribute("TbL.a")), - UnresolvedRelation(TableIdentifier("TaBlE"), Some("TbL"))), + SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")), None)), Project(testRelation.output, testRelation)) assertAnalysisError( - Project(Seq(UnresolvedAttribute("tBl.a")), UnresolvedRelation( - TableIdentifier("TaBlE"), Some("TbL"))), + Project(Seq(UnresolvedAttribute("tBl.a")), + SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")), None)), Seq("cannot resolve")) checkAnalysis( - Project(Seq(UnresolvedAttribute("TbL.a")), UnresolvedRelation( - TableIdentifier("TaBlE"), Some("TbL"))), + Project(Seq(UnresolvedAttribute("TbL.a")), + SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")), None)), Project(testRelation.output, testRelation), caseSensitive = false) checkAnalysis( - Project(Seq(UnresolvedAttribute("tBl.a")), UnresolvedRelation( - TableIdentifier("TaBlE"), Some("TbL"))), + Project(Seq(UnresolvedAttribute("tBl.a")), + SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")), None)), Project(testRelation.output, testRelation), caseSensitive = false) } @@ -166,12 +166,12 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { } test("resolve relations") { - assertAnalysisError(UnresolvedRelation(TableIdentifier("tAbLe"), None), Seq()) - checkAnalysis(UnresolvedRelation(TableIdentifier("TaBlE"), None), testRelation) + assertAnalysisError(UnresolvedRelation(TableIdentifier("tAbLe")), Seq()) + checkAnalysis(UnresolvedRelation(TableIdentifier("TaBlE")), testRelation) checkAnalysis( - UnresolvedRelation(TableIdentifier("tAbLe"), None), testRelation, caseSensitive = false) + UnresolvedRelation(TableIdentifier("tAbLe")), testRelation, caseSensitive = false) checkAnalysis( - UnresolvedRelation(TableIdentifier("TaBlE"), None), testRelation, caseSensitive = false) + UnresolvedRelation(TableIdentifier("TaBlE")), testRelation, caseSensitive = false) } test("divide should be casted into fractional types") { @@ -429,4 +429,14 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { assertAnalysisSuccess(r1) assertAnalysisSuccess(r2) } + + test("resolve as with an already existed alias") { + checkAnalysis( + Project(Seq(UnresolvedAttribute("tbl2.a")), + SubqueryAlias("tbl", testRelation, None).as("tbl2")), + Project(testRelation.output, testRelation), + caseSensitive = false) + + checkAnalysis(SubqueryAlias("tbl", testRelation, None).as("tbl2"), testRelation) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index db73f03c8b..44434324d3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -444,28 +444,6 @@ class SessionCatalogSuite extends PlanTest { == SubqueryAlias("tbl1", SimpleCatalogRelation(metastoreTable1), None)) } - test("lookup table relation with alias") { - val catalog = new SessionCatalog(newBasicCatalog()) - val alias = "monster" - val tableMetadata = catalog.getTableMetadata(TableIdentifier("tbl1", Some("db2"))) - val relation = SubqueryAlias("tbl1", SimpleCatalogRelation(tableMetadata), None) - val relationWithAlias = - SubqueryAlias(alias, - SimpleCatalogRelation(tableMetadata), None) - assert(catalog.lookupRelation( - TableIdentifier("tbl1", Some("db2")), alias = None) == relation) - assert(catalog.lookupRelation( - TableIdentifier("tbl1", Some("db2")), alias = Some(alias)) == relationWithAlias) - } - - test("lookup view with view name in alias") { - val catalog = new SessionCatalog(newBasicCatalog()) - val tmpView = Range(1, 10, 2, 10) - catalog.createTempView("vw1", tmpView, overrideIfExists = false) - val plan = catalog.lookupRelation(TableIdentifier("vw1"), Option("range")) - assert(plan == SubqueryAlias("range", tmpView, None)) - } - test("look up view relation") { val externalCatalog = newBasicCatalog() val sessionCatalog = new SessionCatalog(externalCatalog) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 1c3e7c6d52..e7a59d4ad4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -52,8 +52,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { throw new AnalysisException("Unsupported data source type for direct query on files: " + s"${u.tableIdentifier.database.get}") } - val plan = LogicalRelation(dataSource.resolveRelation()) - u.alias.map(a => SubqueryAlias(a, plan, None)).getOrElse(plan) + LogicalRelation(dataSource.resolveRelation()) } catch { case _: ClassNotFoundException => u case e: Exception => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index f780fc0ec0..2e006735d1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -364,8 +364,8 @@ class JoinSuite extends QueryTest with SharedSQLContext { upperCaseData.where('N <= 4).createOrReplaceTempView("`left`") upperCaseData.where('N >= 3).createOrReplaceTempView("`right`") - val left = UnresolvedRelation(TableIdentifier("left"), None) - val right = UnresolvedRelation(TableIdentifier("right"), None) + val left = UnresolvedRelation(TableIdentifier("left")) + val right = UnresolvedRelation(TableIdentifier("right")) checkAnswer( left.join(right, $"left.N" === $"right.N", "full"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 3988d9750b..239822b720 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -73,13 +73,13 @@ object TPCDSQueryBenchmark { // per-row processing time for those cases. val queryRelations = scala.collection.mutable.HashSet[String]() spark.sql(queryString).queryExecution.logical.map { - case ur @ UnresolvedRelation(t: TableIdentifier, _) => + case ur @ UnresolvedRelation(t: TableIdentifier) => queryRelations.add(t.table) case lp: LogicalPlan => lp.expressions.foreach { _ foreach { case subquery: SubqueryExpression => subquery.plan.foreach { - case ur @ UnresolvedRelation(t: TableIdentifier, _) => + case ur @ UnresolvedRelation(t: TableIdentifier) => queryRelations.add(t.table) case _ => } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 3267c237c8..fd13911947 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -483,7 +483,7 @@ private[hive] class TestHiveQueryExecution( // Make sure any test tables referenced are loaded. val referencedTables = describedTables ++ - logical.collect { case UnresolvedRelation(tableIdent, _) => tableIdent.table } + logical.collect { case UnresolvedRelation(tableIdent) => tableIdent.table } val referencedTestTables = referencedTables.filter(sparkSession.testTables.contains) logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}") referencedTestTables.foreach(sparkSession.loadTestTable) -- cgit v1.2.3