From d6eede9a36766e2d2294951b054d7557008a5662 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 7 Sep 2016 09:36:53 +0800 Subject: [SPARK-17238][SQL] simplify the logic for converting data source table into hive compatible format ## What changes were proposed in this pull request? Previously we have 2 conditions to decide whether a data source table is hive-compatible: 1. the data source is file-based and has a corresponding Hive serde 2. have a `path` entry in data source options/storage properties However, if condition 1 is true, condition 2 must be true too, as we will put the default table path into data source options/storage properties for managed data source tables. There is also a potential issue: we will set the `locationUri` even for managed table. This PR removes the condition 2 and only set the `locationUri` for external data source tables. Note: this is also a first step to unify the `path` of data source tables and `locationUri` of hive serde tables. For hive serde tables, `locationUri` is only set for external table. For data source tables, `path` is always set. We can make them consistent after this PR. ## How was this patch tested? existing tests Author: Wenchen Fan Closes #14809 from cloud-fan/minor2. --- .../spark/sql/hive/HiveExternalCatalog.scala | 32 ++++++++++++---------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'sql') diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 2e127ef562..d35a681b67 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -249,10 +249,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat } // converts the table metadata to Hive compatible format, i.e. set the serde information. - def newHiveCompatibleMetastoreTable(serde: HiveSerDe, path: String): CatalogTable = { + def newHiveCompatibleMetastoreTable(serde: HiveSerDe): CatalogTable = { + val location = if (tableDefinition.tableType == EXTERNAL) { + // When we hit this branch, we are saving an external data source table with hive + // compatible format, which means the data source is file-based and must have a `path`. + val map = new CaseInsensitiveMap(tableDefinition.storage.properties) + require(map.contains("path"), + "External file-based data source table must have a `path` entry in storage properties.") + Some(new Path(map("path")).toUri.toString) + } else { + None + } + tableDefinition.copy( storage = tableDefinition.storage.copy( - locationUri = Some(new Path(path).toUri.toString), + locationUri = location, inputFormat = serde.inputFormat, outputFormat = serde.outputFormat, serde = serde.serde @@ -262,11 +273,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val qualifiedTableName = tableDefinition.identifier.quotedString val maybeSerde = HiveSerDe.sourceToSerDe(tableDefinition.provider.get) - val maybePath = new CaseInsensitiveMap(tableDefinition.storage.properties).get("path") val skipHiveMetadata = tableDefinition.storage.properties .getOrElse("skipHiveMetadata", "false").toBoolean - val (hiveCompatibleTable, logMessage) = (maybeSerde, maybePath) match { + val (hiveCompatibleTable, logMessage) = maybeSerde match { case _ if skipHiveMetadata => val message = s"Persisting data source table $qualifiedTableName into Hive metastore in" + @@ -280,17 +290,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. " (None, message) - case (Some(serde), Some(path)) => + case Some(serde) => val message = - s"Persisting file based data source table $qualifiedTableName with an input path " + - s"into Hive metastore in Hive compatible format." - (Some(newHiveCompatibleMetastoreTable(serde, path)), message) - - case (Some(_), None) => - val message = - s"Data source table $qualifiedTableName is not file based. Persisting it into " + - s"Hive metastore in Spark SQL specific format, which is NOT compatible with Hive." - (None, message) + s"Persisting file based data source table $qualifiedTableName into " + + s"Hive metastore in Hive compatible format." + (Some(newHiveCompatibleMetastoreTable(serde)), message) case _ => val provider = tableDefinition.provider.get -- cgit v1.2.3