diff options
author | gatorsmile <gatorsmile@gmail.com> | 2016-07-07 12:07:19 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-07-07 12:07:19 +0800 |
commit | 42279bff686f9808ec7a9e8f4da95c717edc6026 (patch) | |
tree | f80d581c70a7442163756e9e8eab56560c4c63c9 /sql/hive/src/main | |
parent | 34283de160808324da02964cd5dc5df80e59ae71 (diff) | |
download | spark-42279bff686f9808ec7a9e8f4da95c717edc6026.tar.gz spark-42279bff686f9808ec7a9e8f4da95c717edc6026.tar.bz2 spark-42279bff686f9808ec7a9e8f4da95c717edc6026.zip |
[SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCatalogRelation
#### What changes were proposed in this pull request?
Different from the other leaf nodes, `MetastoreRelation` and `SimpleCatalogRelation` have a pre-defined `alias`, which is used to change the qualifier of the node. However, based on the existing alias handling, alias should be put in `SubqueryAlias`.
This PR is to separate alias handling from `MetastoreRelation` and `SimpleCatalogRelation` to make it consistent with the other nodes. It simplifies the signature and conversion to a `BaseRelation`.
For example, below is an example query for `MetastoreRelation`, which is converted to a `LogicalRelation`:
```SQL
SELECT tmp.a + 1 FROM test_parquet_ctas tmp WHERE tmp.a > 2
```
Before changes, the analyzed plan is
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
+- SubqueryAlias tmp
+- Relation[a#951] parquet
```
After changes, the analyzed plan becomes
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
+- SubqueryAlias tmp
+- SubqueryAlias test_parquet_ctas
+- Relation[a#951] parquet
```
**Note: the optimized plans are the same.**
For `SimpleCatalogRelation`, the existing code always generates two Subqueries. Thus, no change is needed.
#### How was this patch tested?
Added test cases.
Author: gatorsmile <gatorsmile@gmail.com>
Closes #14053 from gatorsmile/removeAliasFromMetastoreRelation.
Diffstat (limited to 'sql/hive/src/main')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 10 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala | 10 |
2 files changed, 10 insertions, 10 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 20e64a4e09..2be51ed0e8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -180,8 +180,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log SubqueryAlias(aliasText, sessionState.sqlParser.parsePlan(viewText)) } } else { - MetastoreRelation( - qualifiedTableName.database, qualifiedTableName.name, alias)(table, client, sparkSession) + val qualifiedTable = + MetastoreRelation( + qualifiedTableName.database, qualifiedTableName.name)(table, client, sparkSession) + alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable) } } @@ -385,7 +387,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log // Read path case relation: MetastoreRelation if shouldConvertMetastoreParquet(relation) => val parquetRelation = convertToParquetRelation(relation) - SubqueryAlias(relation.alias.getOrElse(relation.tableName), parquetRelation) + SubqueryAlias(relation.tableName, parquetRelation) } } } @@ -423,7 +425,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log // Read path case relation: MetastoreRelation if shouldConvertMetastoreOrc(relation) => val orcRelation = convertToOrcRelation(relation) - SubqueryAlias(relation.alias.getOrElse(relation.tableName), orcRelation) + SubqueryAlias(relation.tableName, orcRelation) } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala index 58bca2059c..3ab1bdabb9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala @@ -41,8 +41,7 @@ import org.apache.spark.sql.hive.client.HiveClient private[hive] case class MetastoreRelation( databaseName: String, - tableName: String, - alias: Option[String]) + tableName: String) (val catalogTable: CatalogTable, @transient private val client: HiveClient, @transient private val sparkSession: SparkSession) @@ -52,13 +51,12 @@ private[hive] case class MetastoreRelation( case relation: MetastoreRelation => databaseName == relation.databaseName && tableName == relation.tableName && - alias == relation.alias && output == relation.output case _ => false } override def hashCode(): Int = { - Objects.hashCode(databaseName, tableName, alias, output) + Objects.hashCode(databaseName, tableName, output) } override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil @@ -208,7 +206,7 @@ private[hive] case class MetastoreRelation( CatalystSqlParser.parseDataType(f.dataType), // Since data can be dumped in randomly with no validation, everything is nullable. nullable = true - )(qualifier = Some(alias.getOrElse(tableName))) + )(qualifier = Some(tableName)) } /** PartitionKey attributes */ @@ -243,6 +241,6 @@ private[hive] case class MetastoreRelation( } override def newInstance(): MetastoreRelation = { - MetastoreRelation(databaseName, tableName, alias)(catalogTable, client, sparkSession) + MetastoreRelation(databaseName, tableName)(catalogTable, client, sparkSession) } } |