diff options
author | Reynold Xin <rxin@databricks.com> | 2016-11-03 11:48:05 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-11-03 11:48:05 -0700 |
commit | b17057c0a69b9c56e503483d97f5dc209eef0884 (patch) | |
tree | dc36d1f309c2deca27d585c6e4b57e5f2989515a /sql/core/src/main | |
parent | 27daf6bcde782ed3e0f0d951c90c8040fd47e985 (diff) | |
download | spark-b17057c0a69b9c56e503483d97f5dc209eef0884.tar.gz spark-b17057c0a69b9c56e503483d97f5dc209eef0884.tar.bz2 spark-b17057c0a69b9c56e503483d97f5dc209eef0884.zip |
[SPARK-18244][SQL] Rename partitionProviderIsHive -> tracksPartitionsInCatalog
## What changes were proposed in this pull request?
This patch renames partitionProviderIsHive to tracksPartitionsInCatalog, as the old name was too Hive specific.
## How was this patch tested?
Should be covered by existing tests.
Author: Reynold Xin <rxin@databricks.com>
Closes #15750 from rxin/SPARK-18244.
Diffstat (limited to 'sql/core/src/main')
6 files changed, 10 insertions, 13 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index d4b28274cc..7e16e43f2b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -92,7 +92,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo // If metastore partition management for file source tables is enabled, we start off with // partition provider hive, but no partitions in the metastore. The user has to call // `msck repair table` to populate the table partitions. - partitionProviderIsHive = partitionColumnNames.nonEmpty && + tracksPartitionsInCatalog = partitionColumnNames.nonEmpty && sparkSession.sessionState.conf.manageFilesourcePartitions) // We will return Nil or throw exception at the beginning if the table already exists, so when // we reach here, the table should not exist and we should set `ignoreIfExists` to false. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 52af915b0b..b4d3ca1f37 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -523,7 +523,7 @@ case class AlterTableRecoverPartitionsCommand( // Updates the table to indicate that its partition metadata is stored in the Hive metastore. // This is always the case for Hive format tables, but is not true for Datasource tables created // before Spark 2.1 unless they are converted via `msck repair table`. - spark.sessionState.catalog.alterTable(table.copy(partitionProviderIsHive = true)) + spark.sessionState.catalog.alterTable(table.copy(tracksPartitionsInCatalog = true)) catalog.refreshTable(tableName) logInfo(s"Recovered all partitions ($total).") Seq.empty[Row] @@ -702,7 +702,7 @@ object DDLUtils { s"$action is not allowed on $tableName since filesource partition management is " + "disabled (spark.sql.hive.manageFilesourcePartitions = false).") } - if (!table.partitionProviderIsHive && isDatasourceTable(table)) { + if (!table.tracksPartitionsInCatalog && isDatasourceTable(table)) { throw new AnalysisException( s"$action is not allowed on $tableName since its partition metadata is not stored in " + "the Hive metastore. To import this information into the metastore, run " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index f32c956f59..00c646b918 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -467,7 +467,7 @@ case class DescribeTableCommand( if (table.tableType == CatalogTableType.VIEW) describeViewInfo(table, buffer) - if (DDLUtils.isDatasourceTable(table) && table.partitionProviderIsHive) { + if (DDLUtils.isDatasourceTable(table) && table.tracksPartitionsInCatalog) { append(buffer, "Partition Provider:", "Hive", "") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 0b50448a7a..5266611935 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -308,7 +308,7 @@ case class DataSource( } val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions && - catalogTable.isDefined && catalogTable.get.partitionProviderIsHive) { + catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) { new CatalogFileIndex( sparkSession, catalogTable.get, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index e87998fe4a..a548e88cb6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -182,9 +182,10 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { "Cannot overwrite a path that is also being read from.") } - val overwritingSinglePartition = (overwrite.specificPartition.isDefined && + val overwritingSinglePartition = + overwrite.specificPartition.isDefined && t.sparkSession.sessionState.conf.manageFilesourcePartitions && - l.catalogTable.get.partitionProviderIsHive) + l.catalogTable.get.tracksPartitionsInCatalog val effectiveOutputPath = if (overwritingSinglePartition) { val partition = t.sparkSession.sessionState.catalog.getPartition( @@ -203,7 +204,7 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = { if (l.catalogTable.isDefined && updatedPartitions.nonEmpty && l.catalogTable.get.partitionColumnNames.nonEmpty && - l.catalogTable.get.partitionProviderIsHive) { + l.catalogTable.get.tracksPartitionsInCatalog) { val metastoreUpdater = AlterTableAddPartitionCommand( l.catalogTable.get.identifier, updatedPartitions.map(p => (p, None)), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index 927c0c5b95..9c75e2ae74 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -31,11 +31,7 @@ import org.apache.spark.sql.execution.command.RunnableCommand /** * A command for writing data to a [[HadoopFsRelation]]. Supports both overwriting and appending. - * Writing to dynamic partitions is also supported. Each [[InsertIntoHadoopFsRelationCommand]] - * issues a single write job, and owns a UUID that identifies this job. Each concrete - * implementation of [[HadoopFsRelation]] should use this UUID together with task id to generate - * unique file path for each task output file. This UUID is passed to executor side via a - * property named `spark.sql.sources.writeJobUUID`. + * Writing to dynamic partitions is also supported. */ case class InsertIntoHadoopFsRelationCommand( outputPath: Path, |