diff options
author | scwf <wangfei1@huawei.com> | 2015-01-17 15:51:24 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-01-17 15:51:24 -0800 |
commit | c1f3c27f22c75188fbbc718de771ccdd637e4944 (patch) | |
tree | 6daf7ec833a2503255f302fd84f5e84568fd026e /sql | |
parent | f3bfc768d486295239fd651e1be29273023be6da (diff) | |
download | spark-c1f3c27f22c75188fbbc718de771ccdd637e4944.tar.gz spark-c1f3c27f22c75188fbbc718de771ccdd637e4944.tar.bz2 spark-c1f3c27f22c75188fbbc718de771ccdd637e4944.zip |
[SPARK-4937][SQL] Comment for the newly optimization rules in `BooleanSimplification`
Follow up of #3778
/cc rxin
Author: scwf <wangfei1@huawei.com>
Closes #4086 from scwf/commentforspark-4937 and squashes the following commits:
aaf89f6 [scwf] code style issue
2d3406e [scwf] added comment for spark-4937
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f3acb70e03..522f14b091 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -311,13 +311,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // a && a => a case (l, r) if l fastEquals r => l case (_, _) => + /* Do optimize for predicates using formula (a || b) && (a || c) => a || (b && c) + * 1. Split left and right to get the disjunctive predicates, + * i.e. lhsSet = (a, b), rhsSet = (a, c) + * 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a) + * 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c) + * 4. Apply the formula, get the optimized predict: common || (ldiff && rdiff) + */ val lhsSet = splitDisjunctivePredicates(left).toSet val rhsSet = splitDisjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) val ldiff = lhsSet.diff(common) val rdiff = rhsSet.diff(common) if (ldiff.size == 0 || rdiff.size == 0) { - // a && (a || b) + // a && (a || b) => a common.reduce(Or) } else { // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => @@ -339,13 +346,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // a || a => a case (l, r) if l fastEquals r => l case (_, _) => + /* Do optimize for predicates using formula (a && b) || (a && c) => a && (b || c) + * 1. Split left and right to get the conjunctive predicates, + * i.e. lhsSet = (a, b), rhsSet = (a, c) + * 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a) + * 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c) + * 4. Apply the formula, get the optimized predict: common && (ldiff || rdiff) + */ val lhsSet = splitConjunctivePredicates(left).toSet val rhsSet = splitConjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) val ldiff = lhsSet.diff(common) val rdiff = rhsSet.diff(common) if ( ldiff.size == 0 || rdiff.size == 0) { - // a || (b && a) + // a || (b && a) => a common.reduce(And) } else { // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => |