aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-05-13 09:04:37 -0700
committerReynold Xin <rxin@databricks.com>2016-05-13 09:04:37 -0700
commit3ded5bc4db2badc9ff49554e73421021d854306b (patch)
treec8e8bbb95806cddb899c971c333a6088e443fbe9 /sql/hive/src/test
parent10a838967455db80d750ef84a1c6b3088b19fd9f (diff)
downloadspark-3ded5bc4db2badc9ff49554e73421021d854306b.tar.gz
spark-3ded5bc4db2badc9ff49554e73421021d854306b.tar.bz2
spark-3ded5bc4db2badc9ff49554e73421021d854306b.zip
[SPARK-15267][SQL] Refactor options for JDBC and ORC data sources and change default compression for ORC
## What changes were proposed in this pull request? Currently, Parquet, JSON and CSV data sources have a class for thier options, (`ParquetOptions`, `JSONOptions` and `CSVOptions`). It is convenient to manage options for sources to gather options into a class. Currently, `JDBC`, `Text`, `libsvm` and `ORC` datasources do not have this class. This might be nicer if these options are in a unified format so that options can be added and This PR refactors the options in Spark internal data sources adding new classes, `OrcOptions`, `TextOptions`, `JDBCOptions` and `LibSVMOptions`. Also, this PR change the default compression codec for ORC from `NONE` to `SNAPPY`. ## How was this patch tested? Existing tests should cover this for refactoring and unittests in `OrcHadoopFsRelationSuite` for changing the default compression codec for ORC. Author: hyukjinkwon <gurwls223@gmail.com> Closes #13048 from HyukjinKwon/SPARK-15267.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala18
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala8
2 files changed, 18 insertions, 8 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
index 965680ff0d..0207b4e8c9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.hive.orc
import java.io.File
import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.hive.ql.io.orc.{CompressionKind, OrcFile}
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
@@ -98,9 +97,10 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
val fs = FileSystem.getLocal(conf)
val maybeOrcFile = new File(path).listFiles().find(_.getName.endsWith(".zlib.orc"))
assert(maybeOrcFile.isDefined)
- val orcFilePath = new Path(maybeOrcFile.get.toPath.toString)
- val orcReader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))
- assert(orcReader.getCompression == CompressionKind.ZLIB)
+ val orcFilePath = maybeOrcFile.get.toPath.toString
+ val expectedCompressionKind =
+ OrcFileOperator.getFileReader(orcFilePath).get.getCompression
+ assert("ZLIB" === expectedCompressionKind.name())
val copyDf = spark
.read
@@ -108,4 +108,14 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
checkAnswer(df, copyDf)
}
}
+
+ test("Default compression codec is snappy for ORC compression") {
+ withTempPath { file =>
+ spark.range(0, 10).write
+ .orc(file.getCanonicalPath)
+ val expectedCompressionKind =
+ OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
+ assert("SNAPPY" === expectedCompressionKind.name())
+ }
+ }
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index 084546f99d..9a0885822b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -171,7 +171,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
test("Compression options for writing to an ORC file (SNAPPY, ZLIB and NONE)") {
withTempPath { file =>
spark.range(0, 10).write
- .option("orc.compress", "ZLIB")
+ .option("compression", "ZLIB")
.orc(file.getCanonicalPath)
val expectedCompressionKind =
OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
@@ -180,7 +180,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
withTempPath { file =>
spark.range(0, 10).write
- .option("orc.compress", "SNAPPY")
+ .option("compression", "SNAPPY")
.orc(file.getCanonicalPath)
val expectedCompressionKind =
OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
@@ -189,7 +189,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
withTempPath { file =>
spark.range(0, 10).write
- .option("orc.compress", "NONE")
+ .option("compression", "NONE")
.orc(file.getCanonicalPath)
val expectedCompressionKind =
OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
@@ -201,7 +201,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
ignore("LZO compression options for writing to an ORC file not supported in Hive 1.2.1") {
withTempPath { file =>
spark.range(0, 10).write
- .option("orc.compress", "LZO")
+ .option("compression", "LZO")
.orc(file.getCanonicalPath)
val expectedCompressionKind =
OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression