aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/main
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-05-25 23:54:24 -0700
committerReynold Xin <rxin@databricks.com>2016-05-25 23:54:24 -0700
commit361ebc282b2d09dc6dcf21419a53c5c617b1b6bd (patch)
tree0ed7e06fed5e03fec1516386bb16005b3bbc677e /sql/hive/src/main
parentdfc9fc02ccbceb09213c394177d54b9ca56b6f24 (diff)
downloadspark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.gz
spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.bz2
spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.zip
[SPARK-15543][SQL] Rename DefaultSources to make them more self-describing
## What changes were proposed in this pull request? This patch renames various DefaultSources to make their names more self-describing. The choice of "DefaultSource" was from the days when we did not have a good way to specify short names. They are now named: - LibSVMFileFormat - CSVFileFormat - JdbcRelationProvider - JsonFileFormat - ParquetFileFormat - TextFileFormat Backward compatibility is maintained through aliasing. ## How was this patch tested? Updated relevant test cases too. Author: Reynold Xin <rxin@databricks.com> Closes #13311 from rxin/SPARK-15543.
Diffstat (limited to 'sql/hive/src/main')
-rw-r--r--sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala18
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala (renamed from sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala)8
3 files changed, 16 insertions, 12 deletions
diff --git a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 4a774fbf1f..32aa13ff25 100644
--- a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1 +1 @@
-org.apache.spark.sql.hive.orc.DefaultSource
+org.apache.spark.sql.hive.orc.OrcFileFormat
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 86ab152402..b377a20e39 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -32,8 +32,8 @@ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.execution.command.CreateTableAsSelectLogicalPlan
import org.apache.spark.sql.execution.datasources.{Partition => _, _}
-import org.apache.spark.sql.execution.datasources.parquet.{DefaultSource => ParquetDefaultSource, ParquetRelation}
-import org.apache.spark.sql.hive.orc.{DefaultSource => OrcDefaultSource}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.hive.orc.OrcFileFormat
import org.apache.spark.sql.types._
@@ -281,7 +281,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
val inferredSchema =
defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles())
inferredSchema.map { inferred =>
- ParquetRelation.mergeMetastoreParquetSchema(metastoreSchema, inferred)
+ ParquetFileFormat.mergeMetastoreParquetSchema(metastoreSchema, inferred)
}.getOrElse(metastoreSchema)
} else {
defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles()).get
@@ -348,13 +348,13 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
}
private def convertToParquetRelation(relation: MetastoreRelation): LogicalRelation = {
- val defaultSource = new ParquetDefaultSource()
- val fileFormatClass = classOf[ParquetDefaultSource]
+ val defaultSource = new ParquetFileFormat()
+ val fileFormatClass = classOf[ParquetFileFormat]
val mergeSchema = sessionState.convertMetastoreParquetWithSchemaMerging
val options = Map(
- ParquetRelation.MERGE_SCHEMA -> mergeSchema.toString,
- ParquetRelation.METASTORE_TABLE_NAME -> TableIdentifier(
+ ParquetFileFormat.MERGE_SCHEMA -> mergeSchema.toString,
+ ParquetFileFormat.METASTORE_TABLE_NAME -> TableIdentifier(
relation.tableName,
Some(relation.databaseName)
).unquotedString
@@ -400,8 +400,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
}
private def convertToOrcRelation(relation: MetastoreRelation): LogicalRelation = {
- val defaultSource = new OrcDefaultSource()
- val fileFormatClass = classOf[OrcDefaultSource]
+ val defaultSource = new OrcFileFormat()
+ val fileFormatClass = classOf[OrcFileFormat]
val options = Map[String, String]()
convertToLogicalRelation(relation, options, defaultSource, fileFormatClass, "orc")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 38f50c112a..f1198179a0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -42,7 +42,11 @@ import org.apache.spark.sql.sources.{Filter, _}
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.SerializableConfiguration
-private[sql] class DefaultSource
+/**
+ * [[FileFormat]] for reading ORC files. If this is moved or renamed, please update
+ * [[DataSource]]'s backwardCompatibilityMap.
+ */
+private[sql] class OrcFileFormat
extends FileFormat with DataSourceRegister with Serializable {
override def shortName(): String = "orc"
@@ -262,7 +266,7 @@ private[orc] case class OrcTableScan(
// Figure out the actual schema from the ORC source (without partition columns) so that we
// can pick the correct ordinals. Note that this assumes that all files have the same schema.
- val orcFormat = new DefaultSource
+ val orcFormat = new OrcFileFormat
val dataSchema =
orcFormat
.inferSchema(sparkSession, Map.empty, inputPaths)