diff options
author | gatorsmile <gatorsmile@gmail.com> | 2016-07-07 12:07:19 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-07-07 12:07:19 +0800 |
commit | 42279bff686f9808ec7a9e8f4da95c717edc6026 (patch) | |
tree | f80d581c70a7442163756e9e8eab56560c4c63c9 /sql | |
parent | 34283de160808324da02964cd5dc5df80e59ae71 (diff) | |
download | spark-42279bff686f9808ec7a9e8f4da95c717edc6026.tar.gz spark-42279bff686f9808ec7a9e8f4da95c717edc6026.tar.bz2 spark-42279bff686f9808ec7a9e8f4da95c717edc6026.zip |
[SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCatalogRelation
#### What changes were proposed in this pull request?
Different from the other leaf nodes, `MetastoreRelation` and `SimpleCatalogRelation` have a pre-defined `alias`, which is used to change the qualifier of the node. However, based on the existing alias handling, alias should be put in `SubqueryAlias`.
This PR is to separate alias handling from `MetastoreRelation` and `SimpleCatalogRelation` to make it consistent with the other nodes. It simplifies the signature and conversion to a `BaseRelation`.
For example, below is an example query for `MetastoreRelation`, which is converted to a `LogicalRelation`:
```SQL
SELECT tmp.a + 1 FROM test_parquet_ctas tmp WHERE tmp.a > 2
```
Before changes, the analyzed plan is
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
+- SubqueryAlias tmp
+- Relation[a#951] parquet
```
After changes, the analyzed plan becomes
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
+- SubqueryAlias tmp
+- SubqueryAlias test_parquet_ctas
+- Relation[a#951] parquet
```
**Note: the optimized plans are the same.**
For `SimpleCatalogRelation`, the existing code always generates two Subqueries. Thus, no change is needed.
#### How was this patch tested?
Added test cases.
Author: gatorsmile <gatorsmile@gmail.com>
Closes #14053 from gatorsmile/removeAliasFromMetastoreRelation.
Diffstat (limited to 'sql')
5 files changed, 14 insertions, 15 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index e1d49912c3..ffaefeb09a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -403,7 +403,7 @@ class SessionCatalog( val relation = if (name.database.isDefined || !tempTables.contains(table)) { val metadata = externalCatalog.getTable(db, table) - SimpleCatalogRelation(db, metadata, alias) + SimpleCatalogRelation(db, metadata) } else { tempTables(table) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 6197acab33..b12606e17d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -244,8 +244,7 @@ trait CatalogRelation { */ case class SimpleCatalogRelation( databaseName: String, - metadata: CatalogTable, - alias: Option[String] = None) + metadata: CatalogTable) extends LeafNode with CatalogRelation { override def catalogTable: CatalogTable = metadata @@ -261,7 +260,7 @@ case class SimpleCatalogRelation( CatalystSqlParser.parseDataType(f.dataType), // Since data can be dumped in randomly with no validation, everything is nullable. nullable = true - )(qualifier = Some(alias.getOrElse(metadata.identifier.table))) + )(qualifier = Some(metadata.identifier.table)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index c8e7c5103b..05eb302c3c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -407,7 +407,7 @@ class SessionCatalogSuite extends SparkFunSuite { val relationWithAlias = SubqueryAlias(alias, SubqueryAlias("tbl1", - SimpleCatalogRelation("db2", tableMetadata, Some(alias)))) + SimpleCatalogRelation("db2", tableMetadata))) assert(catalog.lookupRelation( TableIdentifier("tbl1", Some("db2")), alias = None) == relation) assert(catalog.lookupRelation( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 20e64a4e09..2be51ed0e8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -180,8 +180,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log SubqueryAlias(aliasText, sessionState.sqlParser.parsePlan(viewText)) } } else { - MetastoreRelation( - qualifiedTableName.database, qualifiedTableName.name, alias)(table, client, sparkSession) + val qualifiedTable = + MetastoreRelation( + qualifiedTableName.database, qualifiedTableName.name)(table, client, sparkSession) + alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable) } } @@ -385,7 +387,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log // Read path case relation: MetastoreRelation if shouldConvertMetastoreParquet(relation) => val parquetRelation = convertToParquetRelation(relation) - SubqueryAlias(relation.alias.getOrElse(relation.tableName), parquetRelation) + SubqueryAlias(relation.tableName, parquetRelation) } } } @@ -423,7 +425,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log // Read path case relation: MetastoreRelation if shouldConvertMetastoreOrc(relation) => val orcRelation = convertToOrcRelation(relation) - SubqueryAlias(relation.alias.getOrElse(relation.tableName), orcRelation) + SubqueryAlias(relation.tableName, orcRelation) } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala index 58bca2059c..3ab1bdabb9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala @@ -41,8 +41,7 @@ import org.apache.spark.sql.hive.client.HiveClient private[hive] case class MetastoreRelation( databaseName: String, - tableName: String, - alias: Option[String]) + tableName: String) (val catalogTable: CatalogTable, @transient private val client: HiveClient, @transient private val sparkSession: SparkSession) @@ -52,13 +51,12 @@ private[hive] case class MetastoreRelation( case relation: MetastoreRelation => databaseName == relation.databaseName && tableName == relation.tableName && - alias == relation.alias && output == relation.output case _ => false } override def hashCode(): Int = { - Objects.hashCode(databaseName, tableName, alias, output) + Objects.hashCode(databaseName, tableName, output) } override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil @@ -208,7 +206,7 @@ private[hive] case class MetastoreRelation( CatalystSqlParser.parseDataType(f.dataType), // Since data can be dumped in randomly with no validation, everything is nullable. nullable = true - )(qualifier = Some(alias.getOrElse(tableName))) + )(qualifier = Some(tableName)) } /** PartitionKey attributes */ @@ -243,6 +241,6 @@ private[hive] case class MetastoreRelation( } override def newInstance(): MetastoreRelation = { - MetastoreRelation(databaseName, tableName, alias)(catalogTable, client, sparkSession) + MetastoreRelation(databaseName, tableName)(catalogTable, client, sparkSession) } } |