aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-07-07 12:07:19 +0800
committerWenchen Fan <wenchen@databricks.com>2016-07-07 12:07:19 +0800
commit42279bff686f9808ec7a9e8f4da95c717edc6026 (patch)
treef80d581c70a7442163756e9e8eab56560c4c63c9 /sql/hive/src
parent34283de160808324da02964cd5dc5df80e59ae71 (diff)
downloadspark-42279bff686f9808ec7a9e8f4da95c717edc6026.tar.gz
spark-42279bff686f9808ec7a9e8f4da95c717edc6026.tar.bz2
spark-42279bff686f9808ec7a9e8f4da95c717edc6026.zip
[SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCatalogRelation
#### What changes were proposed in this pull request? Different from the other leaf nodes, `MetastoreRelation` and `SimpleCatalogRelation` have a pre-defined `alias`, which is used to change the qualifier of the node. However, based on the existing alias handling, alias should be put in `SubqueryAlias`. This PR is to separate alias handling from `MetastoreRelation` and `SimpleCatalogRelation` to make it consistent with the other nodes. It simplifies the signature and conversion to a `BaseRelation`. For example, below is an example query for `MetastoreRelation`, which is converted to a `LogicalRelation`: ```SQL SELECT tmp.a + 1 FROM test_parquet_ctas tmp WHERE tmp.a > 2 ``` Before changes, the analyzed plan is ``` == Analyzed Logical Plan == (a + 1): int Project [(a#951 + 1) AS (a + 1)#952] +- Filter (a#951 > 2) +- SubqueryAlias tmp +- Relation[a#951] parquet ``` After changes, the analyzed plan becomes ``` == Analyzed Logical Plan == (a + 1): int Project [(a#951 + 1) AS (a + 1)#952] +- Filter (a#951 > 2) +- SubqueryAlias tmp +- SubqueryAlias test_parquet_ctas +- Relation[a#951] parquet ``` **Note: the optimized plans are the same.** For `SimpleCatalogRelation`, the existing code always generates two Subqueries. Thus, no change is needed. #### How was this patch tested? Added test cases. Author: gatorsmile <gatorsmile@gmail.com> Closes #14053 from gatorsmile/removeAliasFromMetastoreRelation.
Diffstat (limited to 'sql/hive/src')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala10
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala10
2 files changed, 10 insertions, 10 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 20e64a4e09..2be51ed0e8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -180,8 +180,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
SubqueryAlias(aliasText, sessionState.sqlParser.parsePlan(viewText))
}
} else {
- MetastoreRelation(
- qualifiedTableName.database, qualifiedTableName.name, alias)(table, client, sparkSession)
+ val qualifiedTable =
+ MetastoreRelation(
+ qualifiedTableName.database, qualifiedTableName.name)(table, client, sparkSession)
+ alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable)
}
}
@@ -385,7 +387,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
// Read path
case relation: MetastoreRelation if shouldConvertMetastoreParquet(relation) =>
val parquetRelation = convertToParquetRelation(relation)
- SubqueryAlias(relation.alias.getOrElse(relation.tableName), parquetRelation)
+ SubqueryAlias(relation.tableName, parquetRelation)
}
}
}
@@ -423,7 +425,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
// Read path
case relation: MetastoreRelation if shouldConvertMetastoreOrc(relation) =>
val orcRelation = convertToOrcRelation(relation)
- SubqueryAlias(relation.alias.getOrElse(relation.tableName), orcRelation)
+ SubqueryAlias(relation.tableName, orcRelation)
}
}
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
index 58bca2059c..3ab1bdabb9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@@ -41,8 +41,7 @@ import org.apache.spark.sql.hive.client.HiveClient
private[hive] case class MetastoreRelation(
databaseName: String,
- tableName: String,
- alias: Option[String])
+ tableName: String)
(val catalogTable: CatalogTable,
@transient private val client: HiveClient,
@transient private val sparkSession: SparkSession)
@@ -52,13 +51,12 @@ private[hive] case class MetastoreRelation(
case relation: MetastoreRelation =>
databaseName == relation.databaseName &&
tableName == relation.tableName &&
- alias == relation.alias &&
output == relation.output
case _ => false
}
override def hashCode(): Int = {
- Objects.hashCode(databaseName, tableName, alias, output)
+ Objects.hashCode(databaseName, tableName, output)
}
override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil
@@ -208,7 +206,7 @@ private[hive] case class MetastoreRelation(
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
- )(qualifier = Some(alias.getOrElse(tableName)))
+ )(qualifier = Some(tableName))
}
/** PartitionKey attributes */
@@ -243,6 +241,6 @@ private[hive] case class MetastoreRelation(
}
override def newInstance(): MetastoreRelation = {
- MetastoreRelation(databaseName, tableName, alias)(catalogTable, client, sparkSession)
+ MetastoreRelation(databaseName, tableName)(catalogTable, client, sparkSession)
}
}