aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-12-08 12:52:05 -0800
committerReynold Xin <rxin@databricks.com>2016-12-08 12:52:05 -0800
commit5f894d23a54ea99f75f8b722e111e5270f7f80cf (patch)
tree5ac390b17b41c90c02c21702533826033b676cf1
parent26432df9cc6ffe569583aa628c6ecd7050b38316 (diff)
downloadspark-5f894d23a54ea99f75f8b722e111e5270f7f80cf.tar.gz
spark-5f894d23a54ea99f75f8b722e111e5270f7f80cf.tar.bz2
spark-5f894d23a54ea99f75f8b722e111e5270f7f80cf.zip
[SPARK-18760][SQL] Consistent format specification for FileFormats
## What changes were proposed in this pull request? This patch fixes the format specification in explain for file sources (Parquet and Text formats are the only two that are different from the rest): Before: ``` scala> spark.read.text("test.text").explain() == Physical Plan == *FileScan text [value#15] Batched: false, Format: org.apache.spark.sql.execution.datasources.text.TextFileFormatxyz, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string> ``` After: ``` scala> spark.read.text("test.text").explain() == Physical Plan == *FileScan text [value#15] Batched: false, Format: Text, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string> ``` Also closes #14680. ## How was this patch tested? Verified in spark-shell. Author: Reynold Xin <rxin@databricks.com> Closes #16187 from rxin/SPARK-18760.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala7
3 files changed, 7 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 031a0fe578..0965ffebea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -61,7 +61,7 @@ class ParquetFileFormat
override def shortName(): String = "parquet"
- override def toString: String = "ParquetFormat"
+ override def toString: String = "Parquet"
override def hashCode(): Int = getClass.hashCode()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 178160cd71..897e535953 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -39,6 +39,8 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
override def shortName(): String = "text"
+ override def toString: String = "Text"
+
private def verifySchema(schema: StructType): Unit = {
if (schema.size != 1) {
throw new AnalysisException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 7b6fe83b9a..267c462484 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
-class FileStreamSourceTest extends StreamTest with SharedSQLContext with PrivateMethodTester {
+abstract class FileStreamSourceTest
+ extends StreamTest with SharedSQLContext with PrivateMethodTester {
import testImplicits._
@@ -848,13 +849,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
val explainWithoutExtended = q.explainInternal(false)
// `extended = false` only displays the physical plan.
assert("Relation.*text".r.findAllMatchIn(explainWithoutExtended).size === 0)
- assert("TextFileFormat".r.findAllMatchIn(explainWithoutExtended).size === 1)
+ assert(": Text".r.findAllMatchIn(explainWithoutExtended).size === 1)
val explainWithExtended = q.explainInternal(true)
// `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical
// plan.
assert("Relation.*text".r.findAllMatchIn(explainWithExtended).size === 3)
- assert("TextFileFormat".r.findAllMatchIn(explainWithExtended).size === 1)
+ assert(": Text".r.findAllMatchIn(explainWithExtended).size === 1)
} finally {
q.stop()
}