aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorSameer Agarwal <sameer@databricks.com>2016-03-08 15:40:45 -0800
committerYin Huai <yhuai@databricks.com>2016-03-08 15:40:45 -0800
commite430614eae53c8864b31a1dc64db83e27100d1d9 (patch)
tree8ca19928c742c82495aa473d7f2d28cfa9b372a6 /sql/catalyst
parent1e28840594b9d972c96d3922ca0bf0f76e313e82 (diff)
downloadspark-e430614eae53c8864b31a1dc64db83e27100d1d9.tar.gz
spark-e430614eae53c8864b31a1dc64db83e27100d1d9.tar.bz2
spark-e430614eae53c8864b31a1dc64db83e27100d1d9.zip
[SPARK-13668][SQL] Reorder filter/join predicates to short-circuit isNotNull checks
## What changes were proposed in this pull request? If a filter predicate or a join condition consists of `IsNotNull` checks, we should reorder these checks such that these non-nullability checks are evaluated before the rest of the predicates. For e.g., if a filter predicate is of the form `a > 5 && isNotNull(b)`, we should rewrite this as `isNotNull(b) && a > 5` during physical plan generation. ## How was this patch tested? new unit tests that verify the physical plan for both filters and joins in `ReorderedPredicateSuite` Author: Sameer Agarwal <sameer@databricks.com> Closes #11511 from sameeragarwal/reorder-isnotnull.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala24
1 files changed, 23 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
index 56a3dd02f9..1e4523e2d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
@@ -18,6 +18,8 @@
package org.apache.spark.sql.catalyst.planning
import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.expressions.{And, Expression, IsNotNull, PredicateHelper}
+import org.apache.spark.sql.catalyst.plans
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.TreeNode
@@ -26,8 +28,28 @@ import org.apache.spark.sql.catalyst.trees.TreeNode
* be used for execution. If this strategy does not apply to the give logical operation then an
* empty list should be returned.
*/
-abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends Logging {
+abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]]
+ extends PredicateHelper with Logging {
+
def apply(plan: LogicalPlan): Seq[PhysicalPlan]
+
+ // Attempts to re-order the individual conjunctive predicates in an expression to short circuit
+ // the evaluation of relatively cheaper checks (e.g., checking for nullability) before others.
+ protected def reorderPredicates(expr: Expression): Expression = {
+ splitConjunctivePredicates(expr)
+ .sortWith((x, _) => x.isInstanceOf[IsNotNull])
+ .reduce(And)
+ }
+
+ // Wrapper around reorderPredicates(expr: Expression) to reorder optional conditions in joins
+ protected def reorderPredicates(exprOpt: Option[Expression]): Option[Expression] = {
+ exprOpt match {
+ case Some(expr) =>
+ Option(reorderPredicates(expr))
+ case None =>
+ exprOpt
+ }
+ }
}
/**