[SPARK-15543][SQL] Rename DefaultSources to make them more self-describing

## What changes were proposed in this pull request? This patch renames various DefaultSources to make their names more self-describing. The choice of "DefaultSource" was from the days when we did not have a good way to specify short names. They are now named: - LibSVMFileFormat - CSVFileFormat - JdbcRelationProvider - JsonFileFormat - ParquetFileFormat - TextFileFormat Backward compatibility is maintained through aliasing. ## How was this patch tested? Updated relevant test cases too. Author: Reynold Xin <rxin@databricks.com> Closes #13311 from rxin/SPARK-15543.
author: Reynold Xin <rxin@databricks.com> 2016-05-25 23:54:24 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-05-25 23:54:24 -0700
commit: 361ebc282b2d09dc6dcf21419a53c5c617b1b6bd (patch)
tree: 0ed7e06fed5e03fec1516386bb16005b3bbc677e /sql/hive/src/main
parent: dfc9fc02ccbceb09213c394177d54b9ca56b6f24 (diff)
download: spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.gz
spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.bz2
spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.zip
3 files changed, 16 insertions, 12 deletions
diff --git a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 4a774fbf1f..32aa13ff25 100644
--- a/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/hive/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1 +1 @@
-org.apache.spark.sql.hive.orc.DefaultSource
+org.apache.spark.sql.hive.orc.OrcFileFormat
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 86ab152402..b377a20e39 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -32,8 +32,8 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.execution.command.CreateTableAsSelectLogicalPlan
 import org.apache.spark.sql.execution.datasources.{Partition => _, _}
-import org.apache.spark.sql.execution.datasources.parquet.{DefaultSource => ParquetDefaultSource, ParquetRelation}
-import org.apache.spark.sql.hive.orc.{DefaultSource => OrcDefaultSource}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.hive.orc.OrcFileFormat
 import org.apache.spark.sql.types._
 
 
@@ -281,7 +281,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
           val inferredSchema =
             defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles())
           inferredSchema.map { inferred =>
-            ParquetRelation.mergeMetastoreParquetSchema(metastoreSchema, inferred)
+            ParquetFileFormat.mergeMetastoreParquetSchema(metastoreSchema, inferred)
           }.getOrElse(metastoreSchema)
         } else {
           defaultSource.inferSchema(sparkSession, options, fileCatalog.allFiles()).get
@@ -348,13 +348,13 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     }
 
     private def convertToParquetRelation(relation: MetastoreRelation): LogicalRelation = {
-      val defaultSource = new ParquetDefaultSource()
-      val fileFormatClass = classOf[ParquetDefaultSource]
+      val defaultSource = new ParquetFileFormat()
+      val fileFormatClass = classOf[ParquetFileFormat]
 
       val mergeSchema = sessionState.convertMetastoreParquetWithSchemaMerging
       val options = Map(
-        ParquetRelation.MERGE_SCHEMA -> mergeSchema.toString,
-        ParquetRelation.METASTORE_TABLE_NAME -> TableIdentifier(
+        ParquetFileFormat.MERGE_SCHEMA -> mergeSchema.toString,
+        ParquetFileFormat.METASTORE_TABLE_NAME -> TableIdentifier(
           relation.tableName,
           Some(relation.databaseName)
         ).unquotedString
@@ -400,8 +400,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     }
 
     private def convertToOrcRelation(relation: MetastoreRelation): LogicalRelation = {
-      val defaultSource = new OrcDefaultSource()
-      val fileFormatClass = classOf[OrcDefaultSource]
+      val defaultSource = new OrcFileFormat()
+      val fileFormatClass = classOf[OrcFileFormat]
       val options = Map[String, String]()
 
       convertToLogicalRelation(relation, options, defaultSource, fileFormatClass, "orc")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 38f50c112a..f1198179a0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -42,7 +42,11 @@ import org.apache.spark.sql.sources.{Filter, _}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
-private[sql] class DefaultSource
+/**
+ * [[FileFormat]] for reading ORC files. If this is moved or renamed, please update
+ * [[DataSource]]'s backwardCompatibilityMap.
+ */
+private[sql] class OrcFileFormat
   extends FileFormat with DataSourceRegister with Serializable {
 
   override def shortName(): String = "orc"
@@ -262,7 +266,7 @@ private[orc] case class OrcTableScan(
 
     // Figure out the actual schema from the ORC source (without partition columns) so that we
     // can pick the correct ordinals.  Note that this assumes that all files have the same schema.
-    val orcFormat = new DefaultSource
+    val orcFormat = new OrcFileFormat
     val dataSchema =
       orcFormat
         .inferSchema(sparkSession, Map.empty, inputPaths)
author	Reynold Xin <rxin@databricks.com>	2016-05-25 23:54:24 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-05-25 23:54:24 -0700
commit	361ebc282b2d09dc6dcf21419a53c5c617b1b6bd (patch)
tree	0ed7e06fed5e03fec1516386bb16005b3bbc677e /sql/hive/src/main
parent	dfc9fc02ccbceb09213c394177d54b9ca56b6f24 (diff)
download	spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.gz spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.tar.bz2 spark-361ebc282b2d09dc6dcf21419a53c5c617b1b6bd.zip