aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-06-15 14:08:55 -0700
committerCheng Lian <lian@databricks.com>2016-06-15 14:08:55 -0700
commit09925735b5e53db61ed12abae58864670a3a5f98 (patch)
treef75f78a1585ab10afd1fa1245a0d09462c501e82
parent4df8df5c2e68f5a5d231c401b04d762d7a648159 (diff)
downloadspark-09925735b5e53db61ed12abae58864670a3a5f98.tar.gz
spark-09925735b5e53db61ed12abae58864670a3a5f98.tar.bz2
spark-09925735b5e53db61ed12abae58864670a3a5f98.zip
[SPARK-15901][SQL][TEST] Verification of CONVERT_METASTORE_ORC and CONVERT_METASTORE_PARQUET
#### What changes were proposed in this pull request? So far, we do not have test cases for verifying whether the external parameters `HiveUtils .CONVERT_METASTORE_ORC` and `HiveUtils.CONVERT_METASTORE_PARQUET` properly works when users use non-default values. This PR is to add such test cases for avoiding potential regression. #### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #13622 from gatorsmile/addTestCase4parquetOrcConversion.
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala75
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala40
2 files changed, 83 insertions, 32 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index e6c9c5d4d9..cd41da7214 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.hive.orc
-import java.io.File
import java.nio.charset.StandardCharsets
import org.scalatest.BeforeAndAfterAll
@@ -25,7 +24,7 @@ import org.scalatest.BeforeAndAfterAll
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.hive.test.TestHive.implicits._
import org.apache.spark.sql.internal.SQLConf
@@ -401,36 +400,48 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
}
}
- test("SPARK-14070 Use ORC data source for SQL queries on ORC tables") {
- withTempPath { dir =>
- withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true",
- HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
- val path = dir.getCanonicalPath
-
- withTable("dummy_orc") {
- withTempTable("single") {
- spark.sql(
- s"""CREATE TABLE dummy_orc(key INT, value STRING)
- |STORED AS ORC
- |LOCATION '$path'
- """.stripMargin)
-
- val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1)
- singleRowDF.createOrReplaceTempView("single")
-
- spark.sql(
- s"""INSERT INTO TABLE dummy_orc
- |SELECT key, value FROM single
- """.stripMargin)
-
- val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0")
- checkAnswer(df, singleRowDF)
-
- val queryExecution = df.queryExecution
- queryExecution.analyzed.collectFirst {
- case _: LogicalRelation => ()
- }.getOrElse {
- fail(s"Expecting the query plan to have LogicalRelation, but got:\n$queryExecution")
+ test("Verify the ORC conversion parameter: CONVERT_METASTORE_ORC") {
+ withTempTable("single") {
+ val singleRowDF = Seq((0, "foo")).toDF("key", "value")
+ singleRowDF.createOrReplaceTempView("single")
+
+ Seq("true", "false").foreach { orcConversion =>
+ withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> orcConversion) {
+ withTable("dummy_orc") {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ spark.sql(
+ s"""
+ |CREATE TABLE dummy_orc(key INT, value STRING)
+ |STORED AS ORC
+ |LOCATION '$path'
+ """.stripMargin)
+
+ spark.sql(
+ s"""
+ |INSERT INTO TABLE dummy_orc
+ |SELECT key, value FROM single
+ """.stripMargin)
+
+ val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0")
+ checkAnswer(df, singleRowDF)
+
+ val queryExecution = df.queryExecution
+ if (orcConversion == "true") {
+ queryExecution.analyzed.collectFirst {
+ case _: LogicalRelation => ()
+ }.getOrElse {
+ fail(s"Expecting the query plan to convert orc to data sources, " +
+ s"but got:\n$queryExecution")
+ }
+ } else {
+ queryExecution.analyzed.collectFirst {
+ case _: MetastoreRelation => ()
+ }.getOrElse {
+ fail(s"Expecting no conversion from orc to data sources, " +
+ s"but got:\n$queryExecution")
+ }
+ }
}
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 06b74da196..6af9976ea0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -676,6 +676,46 @@ class ParquetSourceSuite extends ParquetPartitioningTest {
}
}
+ test("Verify the PARQUET conversion parameter: CONVERT_METASTORE_PARQUET") {
+ withTempTable("single") {
+ val singleRowDF = Seq((0, "foo")).toDF("key", "value")
+ singleRowDF.createOrReplaceTempView("single")
+
+ Seq("true", "false").foreach { parquetConversion =>
+ withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) {
+ val tableName = "test_parquet_ctas"
+ withTable(tableName) {
+ sql(
+ s"""
+ |CREATE TABLE $tableName STORED AS PARQUET
+ |AS SELECT tmp.key, tmp.value FROM single tmp
+ """.stripMargin)
+
+ val df = spark.sql(s"SELECT * FROM $tableName WHERE key=0")
+ checkAnswer(df, singleRowDF)
+
+ val queryExecution = df.queryExecution
+ if (parquetConversion == "true") {
+ queryExecution.analyzed.collectFirst {
+ case _: LogicalRelation =>
+ }.getOrElse {
+ fail(s"Expecting the query plan to convert parquet to data sources, " +
+ s"but got:\n$queryExecution")
+ }
+ } else {
+ queryExecution.analyzed.collectFirst {
+ case _: MetastoreRelation =>
+ }.getOrElse {
+ fail(s"Expecting no conversion from parquet to data sources, " +
+ s"but got:\n$queryExecution")
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
test("values in arrays and maps stored in parquet are always nullable") {
val df = createDataFrame(Tuple2(Map(2 -> 3), Seq(4, 5, 6)) :: Nil).toDF("m", "a")
val mapType1 = MapType(IntegerType, IntegerType, valueContainsNull = false)