diff options
author | Cheng Lian <lian@databricks.com> | 2016-03-21 11:00:09 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-03-21 11:00:09 -0700 |
commit | 5d8de16e715497e9d3b5306abc5bbc45402e9b43 (patch) | |
tree | ed1ac11aa6f46bdc10f219d9fb4b3d75eacf120a /sql/hive | |
parent | 43ebf7a9cbd70d6af75e140a6fc91bf0ffc2b877 (diff) | |
download | spark-5d8de16e715497e9d3b5306abc5bbc45402e9b43.tar.gz spark-5d8de16e715497e9d3b5306abc5bbc45402e9b43.tar.bz2 spark-5d8de16e715497e9d3b5306abc5bbc45402e9b43.zip |
[SPARK-14004][SQL] NamedExpressions should have at most one qualifier
## What changes were proposed in this pull request?
This is a more aggressive version of PR #11820, which not only fixes the original problem, but also does the following updates to enforce the at-most-one-qualifier constraint:
- Renames `NamedExpression.qualifiers` to `NamedExpression.qualifier`
- Uses `Option[String]` rather than `Seq[String]` for `NamedExpression.qualifier`
Quoted PR description of #11820 here:
> Current implementations of `AttributeReference.sql` and `Alias.sql` joins all available qualifiers, which is logically wrong. But this implementation mistake doesn't cause any real SQL generation bugs though, since there is always at most one qualifier for any given `AttributeReference` or `Alias`.
## How was this patch tested?
Existing tests should be enough.
Author: Cheng Lian <lian@databricks.com>
Closes #11822 from liancheng/spark-14004-aggressive.
Diffstat (limited to 'sql/hive')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 6 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala | 12 |
2 files changed, 9 insertions, 9 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 69bccfba4a..27e4cfc103 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -421,10 +421,10 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte if (table.properties.get("spark.sql.sources.provider").isDefined) { val dataSourceTable = cachedDataSourceTables(qualifiedTableName) - val tableWithQualifiers = SubqueryAlias(qualifiedTableName.name, dataSourceTable) + val qualifiedTable = SubqueryAlias(qualifiedTableName.name, dataSourceTable) // Then, if alias is specified, wrap the table with a Subquery using the alias. // Otherwise, wrap the table with a Subquery using the table name. - alias.map(a => SubqueryAlias(a, tableWithQualifiers)).getOrElse(tableWithQualifiers) + alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable) } else if (table.tableType == CatalogTableType.VIRTUAL_VIEW) { val viewText = table.viewText.getOrElse(sys.error("Invalid view without text.")) alias match { @@ -935,7 +935,7 @@ private[hive] case class MetastoreRelation( HiveMetastoreTypes.toDataType(f.dataType), // Since data can be dumped in randomly with no validation, everything is nullable. nullable = true - )(qualifiers = Seq(alias.getOrElse(tableName))) + )(qualifier = Some(alias.getOrElse(tableName))) } /** PartitionKey attributes */ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index b2196c4409..e54358e657 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -50,7 +50,7 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi def toSQL: String = { val canonicalizedPlan = Canonicalizer.execute(logicalPlan) val outputNames = logicalPlan.output.map(_.name) - val qualifiers = logicalPlan.output.flatMap(_.qualifiers).distinct + val qualifiers = logicalPlan.output.flatMap(_.qualifier).distinct // Keep the qualifier information by using it as sub-query name, if there is only one qualifier // present. @@ -63,7 +63,7 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi // Canonicalizer will remove all naming information, we should add it back by adding an extra // Project and alias the outputs. val aliasedOutput = canonicalizedPlan.output.zip(outputNames).map { - case (attr, name) => Alias(attr.withQualifiers(Nil), name)() + case (attr, name) => Alias(attr.withQualifier(None), name)() } val finalPlan = Project(aliasedOutput, SubqueryAlias(finalName, canonicalizedPlan)) @@ -411,9 +411,9 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi object NormalizedAttribute extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions { case a: AttributeReference => - AttributeReference(normalizedName(a), a.dataType)(exprId = a.exprId, qualifiers = Nil) + AttributeReference(normalizedName(a), a.dataType)(exprId = a.exprId, qualifier = None) case a: Alias => - Alias(a.child, normalizedName(a))(exprId = a.exprId, qualifiers = Nil) + Alias(a.child, normalizedName(a))(exprId = a.exprId, qualifier = None) } } @@ -513,10 +513,10 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi object ExtractSQLTable { def unapply(plan: LogicalPlan): Option[SQLTable] = plan match { case l @ LogicalRelation(_, _, Some(TableIdentifier(table, Some(database)))) => - Some(SQLTable(database, table, l.output.map(_.withQualifiers(Nil)))) + Some(SQLTable(database, table, l.output.map(_.withQualifier(None)))) case m: MetastoreRelation => - Some(SQLTable(m.databaseName, m.tableName, m.output.map(_.withQualifiers(Nil)))) + Some(SQLTable(m.databaseName, m.tableName, m.output.map(_.withQualifier(None)))) case _ => None } |