diff options
author | hyukjinkwon <gurwls223@gmail.com> | 2016-05-13 09:04:37 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-05-13 09:04:37 -0700 |
commit | 3ded5bc4db2badc9ff49554e73421021d854306b (patch) | |
tree | c8e8bbb95806cddb899c971c333a6088e443fbe9 /sql/hive/src/test | |
parent | 10a838967455db80d750ef84a1c6b3088b19fd9f (diff) | |
download | spark-3ded5bc4db2badc9ff49554e73421021d854306b.tar.gz spark-3ded5bc4db2badc9ff49554e73421021d854306b.tar.bz2 spark-3ded5bc4db2badc9ff49554e73421021d854306b.zip |
[SPARK-15267][SQL] Refactor options for JDBC and ORC data sources and change default compression for ORC
## What changes were proposed in this pull request?
Currently, Parquet, JSON and CSV data sources have a class for thier options, (`ParquetOptions`, `JSONOptions` and `CSVOptions`).
It is convenient to manage options for sources to gather options into a class. Currently, `JDBC`, `Text`, `libsvm` and `ORC` datasources do not have this class. This might be nicer if these options are in a unified format so that options can be added and
This PR refactors the options in Spark internal data sources adding new classes, `OrcOptions`, `TextOptions`, `JDBCOptions` and `LibSVMOptions`.
Also, this PR change the default compression codec for ORC from `NONE` to `SNAPPY`.
## How was this patch tested?
Existing tests should cover this for refactoring and unittests in `OrcHadoopFsRelationSuite` for changing the default compression codec for ORC.
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #13048 from HyukjinKwon/SPARK-15267.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala | 18 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala | 8 |
2 files changed, 18 insertions, 8 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala index 965680ff0d..0207b4e8c9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.hive.orc import java.io.File import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.hadoop.hive.ql.io.orc.{CompressionKind, OrcFile} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.Row @@ -98,9 +97,10 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { val fs = FileSystem.getLocal(conf) val maybeOrcFile = new File(path).listFiles().find(_.getName.endsWith(".zlib.orc")) assert(maybeOrcFile.isDefined) - val orcFilePath = new Path(maybeOrcFile.get.toPath.toString) - val orcReader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf)) - assert(orcReader.getCompression == CompressionKind.ZLIB) + val orcFilePath = maybeOrcFile.get.toPath.toString + val expectedCompressionKind = + OrcFileOperator.getFileReader(orcFilePath).get.getCompression + assert("ZLIB" === expectedCompressionKind.name()) val copyDf = spark .read @@ -108,4 +108,14 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { checkAnswer(df, copyDf) } } + + test("Default compression codec is snappy for ORC compression") { + withTempPath { file => + spark.range(0, 10).write + .orc(file.getCanonicalPath) + val expectedCompressionKind = + OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression + assert("SNAPPY" === expectedCompressionKind.name()) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala index 084546f99d..9a0885822b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala @@ -171,7 +171,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { test("Compression options for writing to an ORC file (SNAPPY, ZLIB and NONE)") { withTempPath { file => spark.range(0, 10).write - .option("orc.compress", "ZLIB") + .option("compression", "ZLIB") .orc(file.getCanonicalPath) val expectedCompressionKind = OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression @@ -180,7 +180,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { withTempPath { file => spark.range(0, 10).write - .option("orc.compress", "SNAPPY") + .option("compression", "SNAPPY") .orc(file.getCanonicalPath) val expectedCompressionKind = OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression @@ -189,7 +189,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { withTempPath { file => spark.range(0, 10).write - .option("orc.compress", "NONE") + .option("compression", "NONE") .orc(file.getCanonicalPath) val expectedCompressionKind = OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression @@ -201,7 +201,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { ignore("LZO compression options for writing to an ORC file not supported in Hive 1.2.1") { withTempPath { file => spark.range(0, 10).write - .option("orc.compress", "LZO") + .option("compression", "LZO") .orc(file.getCanonicalPath) val expectedCompressionKind = OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression |