aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorSameer Agarwal <sameer@databricks.com>2016-03-07 12:04:59 -0800
committerYin Huai <yhuai@databricks.com>2016-03-07 12:04:59 -0800
commitef77003178eb5cdcb4fe519fc540917656c5d577 (patch)
treee98a1feca6b9a8b80a767d938a4bf31e9c61d9af /sql/core/src
parent489641117651d11806d2773b7ded7c163d0260e5 (diff)
downloadspark-ef77003178eb5cdcb4fe519fc540917656c5d577.tar.gz
spark-ef77003178eb5cdcb4fe519fc540917656c5d577.tar.bz2
spark-ef77003178eb5cdcb4fe519fc540917656c5d577.zip
[SPARK-13495][SQL] Add Null Filters in the query plan for Filters/Joins based on their data constraints
## What changes were proposed in this pull request? This PR adds an optimizer rule to eliminate reading (unnecessary) NULL values if they are not required for correctness by inserting `isNotNull` filters is the query plan. These filters are currently inserted beneath existing `Filter` and `Join` operators and are inferred based on their data constraints. Note: While this optimization is applicable to all types of join, it primarily benefits `Inner` and `LeftSemi` joins. ## How was this patch tested? 1. Added a new `NullFilteringSuite` that tests for `IsNotNull` filters in the query plan for joins and filters. Also, tests interaction with the `CombineFilters` optimizer rules. 2. Test generated ExpressionTrees via `OrcFilterSuite` 3. Test filter source pushdown logic via `SimpleTextHadoopFsRelationSuite` cc yhuai nongli Author: Sameer Agarwal <sameer@databricks.com> Closes #11372 from sameeragarwal/gen-isnotnull.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala6
2 files changed, 3 insertions, 5 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index f66e08e6ca..a733237a5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -159,7 +159,7 @@ class PlannerSuite extends SharedSQLContext {
withTempTable("testPushed") {
val exp = sql("select * from testPushed where key = 15").queryExecution.sparkPlan
- assert(exp.toString.contains("PushedFilters: [EqualTo(key,15)]"))
+ assert(exp.toString.contains("PushedFilters: [IsNotNull(key), EqualTo(key,15)]"))
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index bd51154c58..d2947676a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -74,10 +74,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
selectedFilters.foreach { pred =>
val maybeFilter = ParquetFilters.createFilter(df.schema, pred)
assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pred")
- maybeFilter.foreach { f =>
- // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
- assert(f.getClass === filterClass)
- }
+ // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`)
+ maybeFilter.exists(_.getClass === filterClass)
}
checker(stripSparkFilter(query), expected)
}