From c1f3c27f22c75188fbbc718de771ccdd637e4944 Mon Sep 17 00:00:00 2001 From: scwf Date: Sat, 17 Jan 2015 15:51:24 -0800 Subject: [SPARK-4937][SQL] Comment for the newly optimization rules in `BooleanSimplification` Follow up of #3778 /cc rxin Author: scwf Closes #4086 from scwf/commentforspark-4937 and squashes the following commits: aaf89f6 [scwf] code style issue 2d3406e [scwf] added comment for spark-4937 --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'sql') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f3acb70e03..522f14b091 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -311,13 +311,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // a && a => a case (l, r) if l fastEquals r => l case (_, _) => + /* Do optimize for predicates using formula (a || b) && (a || c) => a || (b && c) + * 1. Split left and right to get the disjunctive predicates, + * i.e. lhsSet = (a, b), rhsSet = (a, c) + * 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a) + * 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c) + * 4. Apply the formula, get the optimized predict: common || (ldiff && rdiff) + */ val lhsSet = splitDisjunctivePredicates(left).toSet val rhsSet = splitDisjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) val ldiff = lhsSet.diff(common) val rdiff = rhsSet.diff(common) if (ldiff.size == 0 || rdiff.size == 0) { - // a && (a || b) + // a && (a || b) => a common.reduce(Or) } else { // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => @@ -339,13 +346,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // a || a => a case (l, r) if l fastEquals r => l case (_, _) => + /* Do optimize for predicates using formula (a && b) || (a && c) => a && (b || c) + * 1. Split left and right to get the conjunctive predicates, + * i.e. lhsSet = (a, b), rhsSet = (a, c) + * 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a) + * 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c) + * 4. Apply the formula, get the optimized predict: common && (ldiff || rdiff) + */ val lhsSet = splitConjunctivePredicates(left).toSet val rhsSet = splitConjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) val ldiff = lhsSet.diff(common) val rdiff = rhsSet.diff(common) if ( ldiff.size == 0 || rdiff.size == 0) { - // a || (b && a) + // a || (b && a) => a common.reduce(And) } else { // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => -- cgit v1.2.3