aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-03-24 22:15:51 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-03-24 22:15:51 -0700
commitb637f2d91ab4d3d5bf13e8d959c919ebd776f6af (patch)
tree8c6555150402e804f00eca24e7c71eebc3426a23 /sql/hive/src/test
parent5140598df889f7227c9d6a7953031eeef524badd (diff)
downloadspark-b637f2d91ab4d3d5bf13e8d959c919ebd776f6af.tar.gz
spark-b637f2d91ab4d3d5bf13e8d959c919ebd776f6af.tar.bz2
spark-b637f2d91ab4d3d5bf13e8d959c919ebd776f6af.zip
Unify the logic for column pruning, projection, and filtering of table scans.
This removes duplicated logic, dead code and casting when planning parquet table scans and hive table scans. Other changes: - Fix tests now that we are doing a better job of column pruning (i.e., since pruning predicates are applied before we even start scanning tuples, columns required by these predicates do not need to be included in the output of the scan unless they are also included in the final output of this logical plan fragment). - Add rule to simplify trivial filters. This was required to avoid `WHERE false` from getting pushed into table scans, since `HiveTableScan` (reasonably) refuses to apply partition pruning predicates to non-partitioned tables. Author: Michael Armbrust <michael@databricks.com> Closes #213 from marmbrus/strategyCleanup and squashes the following commits: 48ce403 [Michael Armbrust] Move one more bit of parquet stuff into the core SQLContext. 834ce08 [Michael Armbrust] Address comments. 0f2c6f5 [Michael Armbrust] Unify the logic for column pruning, projection, and filtering of table scans for both Hive and Parquet relations. Fix tests now that we are doing a better job of column pruning.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala12
1 files changed, 6 insertions, 6 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index bb65c91e2a..d2f8e5df5b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -33,7 +33,7 @@ class PruningSuite extends HiveComparisonTest {
createPruningTest("Column pruning: with partitioned table",
"SELECT key FROM srcpart WHERE ds = '2008-04-08' LIMIT 3",
Seq("key"),
- Seq("key", "ds"),
+ Seq("key"),
Seq(
Seq("2008-04-08", "11"),
Seq("2008-04-08", "12")))
@@ -97,7 +97,7 @@ class PruningSuite extends HiveComparisonTest {
createPruningTest("Partition pruning: with filter on string partition key",
"SELECT value, hr FROM srcpart1 WHERE ds = '2008-04-08'",
Seq("value", "hr"),
- Seq("value", "hr", "ds"),
+ Seq("value", "hr"),
Seq(
Seq("2008-04-08", "11"),
Seq("2008-04-08", "12")))
@@ -113,14 +113,14 @@ class PruningSuite extends HiveComparisonTest {
createPruningTest("Partition pruning: left only 1 partition",
"SELECT value, hr FROM srcpart1 WHERE ds = '2008-04-08' AND hr < 12",
Seq("value", "hr"),
- Seq("value", "hr", "ds"),
+ Seq("value", "hr"),
Seq(
Seq("2008-04-08", "11")))
createPruningTest("Partition pruning: all partitions pruned",
"SELECT value, hr FROM srcpart1 WHERE ds = '2014-01-27' AND hr = 11",
Seq("value", "hr"),
- Seq("value", "hr", "ds"),
+ Seq("value", "hr"),
Seq.empty)
createPruningTest("Partition pruning: pruning with both column key and partition key",
@@ -147,8 +147,8 @@ class PruningSuite extends HiveComparisonTest {
(columnNames, partValues)
}.head
- assert(actualOutputColumns sameElements expectedOutputColumns, "Output columns mismatch")
- assert(actualScannedColumns sameElements expectedScannedColumns, "Scanned columns mismatch")
+ assert(actualOutputColumns === expectedOutputColumns, "Output columns mismatch")
+ assert(actualScannedColumns === expectedScannedColumns, "Scanned columns mismatch")
assert(
actualPartValues.length === expectedPartValues.length,