diff options
author | Reynold Xin <rxin@databricks.com> | 2016-05-25 23:54:24 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-05-25 23:54:24 -0700 |
commit | 361ebc282b2d09dc6dcf21419a53c5c617b1b6bd (patch) | |
tree | 0ed7e06fed5e03fec1516386bb16005b3bbc677e /sql/hive/src/main/scala/org/apache | |
parent | dfc9fc02ccbceb09213c394177d54b9ca56b6f24 (diff) | |
download | spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.gz spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.bz2 spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.zip |
[SPARK-15543][SQL] Rename DefaultSources to make them more self-describing
## What changes were proposed in this pull request?
This patch renames various DefaultSources to make their names more self-describing. The choice of "DefaultSource" was from the days when we did not have a good way to specify short names.
They are now named:
- LibSVMFileFormat
- CSVFileFormat
- JdbcRelationProvider
- JsonFileFormat
- ParquetFileFormat
- TextFileFormat
Backward compatibility is maintained through aliasing.
## How was this patch tested?
Updated relevant test cases too.
Author: Reynold Xin <rxin@databricks.com>
Closes #13311 from rxin/SPARK-15543.
Diffstat (limited to 'sql/hive/src/main/scala/org/apache')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 18 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala (renamed from sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala) | 8 |
2 files changed, 15 insertions, 11 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 86ab152402..b377a20e39 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -32,8 +32,8 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.execution.command.CreateTableAsSelectLogicalPlan import org.apache.spark.sql.execution.datasources.{Partition => _, _} -import org.apache.spark.sql.execution.datasources.parquet.{DefaultSource => ParquetDefaultSource, ParquetRelation} -import org.apache.spark.sql.hive.orc.{DefaultSource => OrcDefaultSource} +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.hive.orc.OrcFileFormat import org.apache.spark.sql.types._ @@ -281,7 +281,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log val inferredSchema = defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles()) inferredSchema.map { inferred => - ParquetRelation.mergeMetastoreParquetSchema(metastoreSchema, inferred) + ParquetFileFormat.mergeMetastoreParquetSchema(metastoreSchema, inferred) }.getOrElse(metastoreSchema) } else { defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles()).get @@ -348,13 +348,13 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log } private def convertToParquetRelation(relation: MetastoreRelation): LogicalRelation = { - val defaultSource = new ParquetDefaultSource() - val fileFormatClass = classOf[ParquetDefaultSource] + val defaultSource = new ParquetFileFormat() + val fileFormatClass = classOf[ParquetFileFormat] val mergeSchema = sessionState.convertMetastoreParquetWithSchemaMerging val options = Map( - ParquetRelation.MERGE_SCHEMA -> mergeSchema.toString, - ParquetRelation.METASTORE_TABLE_NAME -> TableIdentifier( + ParquetFileFormat.MERGE_SCHEMA -> mergeSchema.toString, + ParquetFileFormat.METASTORE_TABLE_NAME -> TableIdentifier( relation.tableName, Some(relation.databaseName) ).unquotedString @@ -400,8 +400,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log } private def convertToOrcRelation(relation: MetastoreRelation): LogicalRelation = { - val defaultSource = new OrcDefaultSource() - val fileFormatClass = classOf[OrcDefaultSource] + val defaultSource = new OrcFileFormat() + val fileFormatClass = classOf[OrcFileFormat] val options = Map[String, String]() convertToLogicalRelation(relation, options, defaultSource, fileFormatClass, "orc") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 38f50c112a..f1198179a0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -42,7 +42,11 @@ import org.apache.spark.sql.sources.{Filter, _} import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration -private[sql] class DefaultSource +/** + * [[FileFormat]] for reading ORC files. If this is moved or renamed, please update + * [[DataSource]]'s backwardCompatibilityMap. + */ +private[sql] class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable { override def shortName(): String = "orc" @@ -262,7 +266,7 @@ private[orc] case class OrcTableScan( // Figure out the actual schema from the ORC source (without partition columns) so that we // can pick the correct ordinals. Note that this assumes that all files have the same schema. - val orcFormat = new DefaultSource + val orcFormat = new OrcFileFormat val dataSchema = orcFormat .inferSchema(sparkSession, Map.empty, inputPaths) |