[SQL] Makes conjunction pushdown more aggressive for in-memory table

This is inspired by the [Parquet record filter generation code](https://github.com/apache/spark/blob/64c6b9bad559c21f25cd9fbe37c8813cdab939f2/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala#L387-L400).  [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/3318)  Author: Cheng Lian <lian@databricks.com> Closes #3318 from liancheng/aggresive-conj-pushdown and squashes the following commits: 78b69d2 [Cheng Lian] Makes conjunction pushdown more aggressive
author: Cheng Lian <lian@databricks.com> 2014-11-17 15:33:13 -0800
committer: Michael Armbrust <michael@databricks.com> 2014-11-17 15:33:13 -0800
commit: 5ce7dae859dc273b0fc532c9456b5960b1eca399 (patch)
tree: 5ce02e2826733a84e6cfd659886cfc023deefed6 /sql
parent: 0f3ceb56c78e7260725a09fba0e10aa193cbda4b (diff)
download: spark-5ce7dae859dc273b0fc532c9456b5960b1eca399.tar.gz
spark-5ce7dae859dc273b0fc532c9456b5960b1eca399.tar.bz2
spark-5ce7dae859dc273b0fc532c9456b5960b1eca399.zip
2 files changed, 11 insertions, 5 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
index 455b415d9d..881d32b105 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@@ -182,8 +182,8 @@ private[sql] case class InMemoryColumnarTableScan(
   // to evaluate to `true' based on statistics collected about this partition batch.
   val buildFilter: PartialFunction[Expression, Expression] = {
     case And(lhs: Expression, rhs: Expression)
-      if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) =>
-      buildFilter(lhs) && buildFilter(rhs)
+      if buildFilter.isDefinedAt(lhs) || buildFilter.isDefinedAt(rhs) =>
+      (buildFilter.lift(lhs) ++ buildFilter.lift(rhs)).reduce(_ && _)
 
     case Or(lhs: Expression, rhs: Expression)
       if buildFilter.isDefinedAt(lhs) && buildFilter.isDefinedAt(rhs) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
index 9ba3c21017..82afa31a99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
@@ -78,17 +78,23 @@ class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with Be
   // Conjunction and disjunction
   checkBatchPruning("SELECT key FROM pruningData WHERE key > 8 AND key <= 21", 2, 3)(9 to 21)
   checkBatchPruning("SELECT key FROM pruningData WHERE key < 2 OR key > 99", 2, 2)(Seq(1, 100))
+  checkBatchPruning("SELECT key FROM pruningData WHERE key < 12 AND key IS NOT NULL", 1, 2)(1 to 11)
   checkBatchPruning("SELECT key FROM pruningData WHERE key < 2 OR (key > 78 AND key < 92)", 3, 4) {
     Seq(1) ++ (79 to 91)
   }
+  checkBatchPruning("SELECT key FROM pruningData WHERE NOT (key < 88)", 1, 2) {
+    // Although the `NOT` operator isn't supported directly, the optimizer can transform
+    // `NOT (a < b)` to `b >= a`
+    88 to 100
+  }
 
   // With unsupported predicate
-  checkBatchPruning("SELECT key FROM pruningData WHERE NOT (key < 88)", 1, 2)(88 to 100)
-  checkBatchPruning("SELECT key FROM pruningData WHERE key < 12 AND key IS NOT NULL", 1, 2)(1 to 11)
-
   {
     val seq = (1 to 30).mkString(", ")
     checkBatchPruning(s"SELECT key FROM pruningData WHERE NOT (key IN ($seq))", 5, 10)(31 to 100)
+    checkBatchPruning(s"SELECT key FROM pruningData WHERE NOT (key IN ($seq)) AND key > 88", 1, 2) {
+      89 to 100
+    }
   }
 
   def checkBatchPruning(
author	Cheng Lian <lian@databricks.com>	2014-11-17 15:33:13 -0800
committer	Michael Armbrust <michael@databricks.com>	2014-11-17 15:33:13 -0800
commit	5ce7dae859dc273b0fc532c9456b5960b1eca399 (patch)
tree	5ce02e2826733a84e6cfd659886cfc023deefed6 /sql
parent	0f3ceb56c78e7260725a09fba0e10aa193cbda4b (diff)
download	spark-5ce7dae859dc273b0fc532c9456b5960b1eca399.tar.gz spark-5ce7dae859dc273b0fc532c9456b5960b1eca399.tar.bz2 spark-5ce7dae859dc273b0fc532c9456b5960b1eca399.zip