aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-06-16 14:23:17 -0700
committerAndrew Or <andrew@databricks.com>2016-06-16 14:23:17 -0700
commit796429d7117e2544207bd9d67bda8b603cb1a535 (patch)
tree608c867dd6e68a04f7a922f07ebace8a5a83fdfd /sql
parent7a89f2adbbc82a23f06638806ffc8596a7efe7f3 (diff)
downloadspark-796429d7117e2544207bd9d67bda8b603cb1a535.tar.gz
spark-796429d7117e2544207bd9d67bda8b603cb1a535.tar.bz2
spark-796429d7117e2544207bd9d67bda8b603cb1a535.zip
[SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING
#### What changes were proposed in this pull request? `HIVE_METASTORE_PARTITION_PRUNING` is a public `SQLConf`. When `true`, some predicates will be pushed down into the Hive metastore so that unmatching partitions can be eliminated earlier. The current default value is `false`. For performance improvement, users might turn this parameter on. So far, the code base does not have such a test case to verify whether this `SQLConf` properly works. This PR is to improve the test case coverage for avoiding future regression. #### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #13716 from gatorsmile/addTestMetastorePartitionPruning.
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala60
1 files changed, 57 insertions, 3 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 60f8be5e0e..76d3f3dbab 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,13 +18,14 @@
package org.apache.spark.sql.hive.execution
import org.apache.spark.sql.Row
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.util.Utils
-class HiveTableScanSuite extends HiveComparisonTest {
+class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestHiveSingleton {
createQueryTest("partition_based_table_scan_with_different_serde",
"""
@@ -89,4 +90,57 @@ class HiveTableScanSuite extends HiveComparisonTest {
assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi"))
assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi"))
}
+
+ private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): Unit = {
+ val plan = sql(stmt).queryExecution.sparkPlan
+ val numPartitions = plan.collectFirst {
+ case p: HiveTableScanExec =>
+ p.relation.getHiveQlPartitions(p.partitionPruningPred).length
+ }.getOrElse(0)
+ assert(numPartitions == expectedNumParts)
+ }
+
+ test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") {
+ val view = "src"
+ withTempTable(view) {
+ spark.range(1, 5).createOrReplaceTempView(view)
+ val table = "table_with_partition"
+ withTable(table) {
+ sql(
+ s"""
+ |CREATE TABLE $table(id string)
+ |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string)
+ """.stripMargin)
+ sql(
+ s"""
+ |FROM $view v
+ |INSERT INTO TABLE $table
+ |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
+ |SELECT v.id
+ |INSERT INTO TABLE $table
+ |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e')
+ |SELECT v.id
+ """.stripMargin)
+
+ Seq("true", "false").foreach { hivePruning =>
+ withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) {
+ // If the pruning predicate is used, getHiveQlPartitions should only return the
+ // qualified partition; Otherwise, it return all the partitions.
+ val expectedNumPartitions = if (hivePruning == "true") 1 else 2
+ checkNumScannedPartitions(
+ stmt = s"SELECT id, p2 FROM $table WHERE p2 <= 'b'", expectedNumPartitions)
+ }
+ }
+
+ Seq("true", "false").foreach { hivePruning =>
+ withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) {
+ // If the pruning predicate does not exist, getHiveQlPartitions should always
+ // return all the partitions.
+ checkNumScannedPartitions(
+ stmt = s"SELECT id, p2 FROM $table WHERE id <= 3", expectedNumParts = 2)
+ }
+ }
+ }
+ }
+ }
}