aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-09-07 09:36:53 +0800
committerWenchen Fan <wenchen@databricks.com>2016-09-07 09:36:53 +0800
commitd6eede9a36766e2d2294951b054d7557008a5662 (patch)
treee43c39c8d7b4e79b0d973d41abffc10dfbe06bfa /sql/hive
parenta40657bfd375bd27d65204bb42ed0cbd7bd1ebf2 (diff)
downloadspark-d6eede9a36766e2d2294951b054d7557008a5662.tar.gz
spark-d6eede9a36766e2d2294951b054d7557008a5662.tar.bz2
spark-d6eede9a36766e2d2294951b054d7557008a5662.zip
[SPARK-17238][SQL] simplify the logic for converting data source table into hive compatible format
## What changes were proposed in this pull request? Previously we have 2 conditions to decide whether a data source table is hive-compatible: 1. the data source is file-based and has a corresponding Hive serde 2. have a `path` entry in data source options/storage properties However, if condition 1 is true, condition 2 must be true too, as we will put the default table path into data source options/storage properties for managed data source tables. There is also a potential issue: we will set the `locationUri` even for managed table. This PR removes the condition 2 and only set the `locationUri` for external data source tables. Note: this is also a first step to unify the `path` of data source tables and `locationUri` of hive serde tables. For hive serde tables, `locationUri` is only set for external table. For data source tables, `path` is always set. We can make them consistent after this PR. ## How was this patch tested? existing tests Author: Wenchen Fan <wenchen@databricks.com> Closes #14809 from cloud-fan/minor2.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala32
1 files changed, 18 insertions, 14 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 2e127ef562..d35a681b67 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -249,10 +249,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
}
// converts the table metadata to Hive compatible format, i.e. set the serde information.
- def newHiveCompatibleMetastoreTable(serde: HiveSerDe, path: String): CatalogTable = {
+ def newHiveCompatibleMetastoreTable(serde: HiveSerDe): CatalogTable = {
+ val location = if (tableDefinition.tableType == EXTERNAL) {
+ // When we hit this branch, we are saving an external data source table with hive
+ // compatible format, which means the data source is file-based and must have a `path`.
+ val map = new CaseInsensitiveMap(tableDefinition.storage.properties)
+ require(map.contains("path"),
+ "External file-based data source table must have a `path` entry in storage properties.")
+ Some(new Path(map("path")).toUri.toString)
+ } else {
+ None
+ }
+
tableDefinition.copy(
storage = tableDefinition.storage.copy(
- locationUri = Some(new Path(path).toUri.toString),
+ locationUri = location,
inputFormat = serde.inputFormat,
outputFormat = serde.outputFormat,
serde = serde.serde
@@ -262,11 +273,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
val qualifiedTableName = tableDefinition.identifier.quotedString
val maybeSerde = HiveSerDe.sourceToSerDe(tableDefinition.provider.get)
- val maybePath = new CaseInsensitiveMap(tableDefinition.storage.properties).get("path")
val skipHiveMetadata = tableDefinition.storage.properties
.getOrElse("skipHiveMetadata", "false").toBoolean
- val (hiveCompatibleTable, logMessage) = (maybeSerde, maybePath) match {
+ val (hiveCompatibleTable, logMessage) = maybeSerde match {
case _ if skipHiveMetadata =>
val message =
s"Persisting data source table $qualifiedTableName into Hive metastore in" +
@@ -280,17 +290,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
"Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. "
(None, message)
- case (Some(serde), Some(path)) =>
+ case Some(serde) =>
val message =
- s"Persisting file based data source table $qualifiedTableName with an input path " +
- s"into Hive metastore in Hive compatible format."
- (Some(newHiveCompatibleMetastoreTable(serde, path)), message)
-
- case (Some(_), None) =>
- val message =
- s"Data source table $qualifiedTableName is not file based. Persisting it into " +
- s"Hive metastore in Spark SQL specific format, which is NOT compatible with Hive."
- (None, message)
+ s"Persisting file based data source table $qualifiedTableName into " +
+ s"Hive metastore in Hive compatible format."
+ (Some(newHiveCompatibleMetastoreTable(serde)), message)
case _ =>
val provider = tableDefinition.provider.get