aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2015-11-14 18:36:01 +0800
committerCheng Lian <lian@databricks.com>2015-11-14 18:36:01 +0800
commit139c15b624c88b376ffdd05d78795295c8c4fc17 (patch)
tree8412f41d8547fc4af5181d63972693b71cd1efe4
parentc939c70ac1ab6a26d9fda0a99c4e837f7e5a7935 (diff)
downloadspark-139c15b624c88b376ffdd05d78795295c8c4fc17.tar.gz
spark-139c15b624c88b376ffdd05d78795295c8c4fc17.tar.bz2
spark-139c15b624c88b376ffdd05d78795295c8c4fc17.zip
[SPARK-11694][SQL] Parquet logical types are not being tested properly
All the physical types are properly tested at `ParquetIOSuite` but logical type mapping is not being tested. Author: hyukjinkwon <gurwls223@gmail.com> Author: Hyukjin Kwon <gurwls223@gmail.com> Closes #9660 from HyukjinKwon/SPARK-11694.
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala39
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala17
2 files changed, 47 insertions, 9 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 82a42d68fe..78df363ade 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -91,6 +91,33 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
}
}
+ test("SPARK-11694 Parquet logical types are not being tested properly") {
+ val parquetSchema = MessageTypeParser.parseMessageType(
+ """message root {
+ | required int32 a(INT_8);
+ | required int32 b(INT_16);
+ | required int32 c(DATE);
+ | required int32 d(DECIMAL(1,0));
+ | required int64 e(DECIMAL(10,0));
+ | required binary f(UTF8);
+ | required binary g(ENUM);
+ | required binary h(DECIMAL(32,0));
+ | required fixed_len_byte_array(32) i(DECIMAL(32,0));
+ |}
+ """.stripMargin)
+
+ val expectedSparkTypes = Seq(ByteType, ShortType, DateType, DecimalType(1, 0),
+ DecimalType(10, 0), StringType, StringType, DecimalType(32, 0), DecimalType(32, 0))
+
+ withTempPath { location =>
+ val path = new Path(location.getCanonicalPath)
+ val conf = sparkContext.hadoopConfiguration
+ writeMetadata(parquetSchema, path, conf)
+ val sparkTypes = sqlContext.read.parquet(path.toString).schema.map(_.dataType)
+ assert(sparkTypes === expectedSparkTypes)
+ }
+ }
+
test("string") {
val data = (1 to 4).map(i => Tuple1(i.toString))
// Property spark.sql.parquet.binaryAsString shouldn't affect Parquet files written by Spark SQL
@@ -374,16 +401,10 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
""".stripMargin)
withTempPath { location =>
- val extraMetadata = Collections.singletonMap(
- CatalystReadSupport.SPARK_METADATA_KEY, sparkSchema.toString)
- val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, "Spark")
+ val extraMetadata = Map(CatalystReadSupport.SPARK_METADATA_KEY -> sparkSchema.toString)
val path = new Path(location.getCanonicalPath)
-
- ParquetFileWriter.writeMetadataFile(
- sparkContext.hadoopConfiguration,
- path,
- Collections.singletonList(
- new Footer(path, new ParquetMetadata(fileMetadata, Collections.emptyList()))))
+ val conf = sparkContext.hadoopConfiguration
+ writeMetadata(parquetSchema, path, conf, extraMetadata)
assertResult(sqlContext.read.parquet(path.toString).schema) {
StructType(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
index 8ffb01fc5b..fdd7697c91 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.parquet
import java.io.File
+import org.apache.parquet.schema.MessageType
+
import scala.collection.JavaConverters._
import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag
@@ -117,6 +119,21 @@ private[sql] trait ParquetTest extends SQLTestUtils {
ParquetFileWriter.writeMetadataFile(configuration, path, Seq(footer).asJava)
}
+ /**
+ * This is an overloaded version of `writeMetadata` above to allow writing customized
+ * Parquet schema.
+ */
+ protected def writeMetadata(
+ parquetSchema: MessageType, path: Path, configuration: Configuration,
+ extraMetadata: Map[String, String] = Map.empty[String, String]): Unit = {
+ val extraMetadataAsJava = extraMetadata.asJava
+ val createdBy = s"Apache Spark ${org.apache.spark.SPARK_VERSION}"
+ val fileMetadata = new FileMetaData(parquetSchema, extraMetadataAsJava, createdBy)
+ val parquetMetadata = new ParquetMetadata(fileMetadata, Seq.empty[BlockMetaData].asJava)
+ val footer = new Footer(path, parquetMetadata)
+ ParquetFileWriter.writeMetadataFile(configuration, path, Seq(footer).asJava)
+ }
+
protected def readAllFootersWithoutSummaryFiles(
path: Path, configuration: Configuration): Seq[Footer] = {
val fs = path.getFileSystem(configuration)