aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenchen Fan <cloud0fan@outlook.com>2015-07-17 16:28:24 -0700
committerMichael Armbrust <michael@databricks.com>2015-07-17 16:28:24 -0700
commitbd903ee89f1d1bc4daf63f1f07958cb86d667e1e (patch)
tree0af20a5817f0318b56b52054e8b3fd12e9a79e23
parentfd6b3101fbb0a8c3ebcf89ce9b4e8664406d9869 (diff)
downloadspark-bd903ee89f1d1bc4daf63f1f07958cb86d667e1e.tar.gz
spark-bd903ee89f1d1bc4daf63f1f07958cb86d667e1e.tar.bz2
spark-bd903ee89f1d1bc4daf63f1f07958cb86d667e1e.zip
[SPARK-9117] [SQL] fix BooleanSimplification in case-insensitive
Author: Wenchen Fan <cloud0fan@outlook.com> Closes #7452 from cloud-fan/boolean-simplify and squashes the following commits: 2a6e692 [Wenchen Fan] fix style d3cfd26 [Wenchen Fan] fix BooleanSimplification in case-insensitive
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala28
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala55
2 files changed, 40 insertions, 43 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d5beeec0ff..0f28a0d2c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -393,26 +393,26 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
// (a || b) && (a || c) => a || (b && c)
case _ =>
// 1. Split left and right to get the disjunctive predicates,
- // i.e. lhsSet = (a, b), rhsSet = (a, c)
+ // i.e. lhs = (a, b), rhs = (a, c)
// 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
// 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c)
// 4. Apply the formula, get the optimized predicate: common || (ldiff && rdiff)
- val lhsSet = splitDisjunctivePredicates(left).toSet
- val rhsSet = splitDisjunctivePredicates(right).toSet
- val common = lhsSet.intersect(rhsSet)
+ val lhs = splitDisjunctivePredicates(left)
+ val rhs = splitDisjunctivePredicates(right)
+ val common = lhs.filter(e => rhs.exists(e.semanticEquals(_)))
if (common.isEmpty) {
// No common factors, return the original predicate
and
} else {
- val ldiff = lhsSet.diff(common)
- val rdiff = rhsSet.diff(common)
+ val ldiff = lhs.filterNot(e => common.exists(e.semanticEquals(_)))
+ val rdiff = rhs.filterNot(e => common.exists(e.semanticEquals(_)))
if (ldiff.isEmpty || rdiff.isEmpty) {
// (a || b || c || ...) && (a || b) => (a || b)
common.reduce(Or)
} else {
// (a || b || c || ...) && (a || b || d || ...) =>
// ((c || ...) && (d || ...)) || a || b
- (common + And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
+ (common :+ And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
}
}
} // end of And(left, right)
@@ -431,26 +431,26 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
// (a && b) || (a && c) => a && (b || c)
case _ =>
// 1. Split left and right to get the conjunctive predicates,
- // i.e. lhsSet = (a, b), rhsSet = (a, c)
+ // i.e. lhs = (a, b), rhs = (a, c)
// 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
// 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = (b), rdiff = (c)
// 4. Apply the formula, get the optimized predicate: common && (ldiff || rdiff)
- val lhsSet = splitConjunctivePredicates(left).toSet
- val rhsSet = splitConjunctivePredicates(right).toSet
- val common = lhsSet.intersect(rhsSet)
+ val lhs = splitConjunctivePredicates(left)
+ val rhs = splitConjunctivePredicates(right)
+ val common = lhs.filter(e => rhs.exists(e.semanticEquals(_)))
if (common.isEmpty) {
// No common factors, return the original predicate
or
} else {
- val ldiff = lhsSet.diff(common)
- val rdiff = rhsSet.diff(common)
+ val ldiff = lhs.filterNot(e => common.exists(e.semanticEquals(_)))
+ val rdiff = rhs.filterNot(e => common.exists(e.semanticEquals(_)))
if (ldiff.isEmpty || rdiff.isEmpty) {
// (a && b) || (a && b && c && ...) => a && b
common.reduce(And)
} else {
// (a && b && c && ...) || (a && b && d && ...) =>
// ((c && ...) || (d && ...)) && a && b
- (common + Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And)
+ (common :+ Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And)
}
}
} // end of Or(left, right)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 465a5e6914..d4916ea8d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.optimizer
-import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
+import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, EliminateSubQueries}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -40,29 +40,11 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string)
- // The `foldLeft` is required to handle cases like comparing `a && (b && c)` and `(a && b) && c`
- def compareConditions(e1: Expression, e2: Expression): Boolean = (e1, e2) match {
- case (lhs: And, rhs: And) =>
- val lhsSet = splitConjunctivePredicates(lhs).toSet
- val rhsSet = splitConjunctivePredicates(rhs).toSet
- lhsSet.foldLeft(rhsSet) { (set, e) =>
- set.find(compareConditions(_, e)).map(set - _).getOrElse(set)
- }.isEmpty
-
- case (lhs: Or, rhs: Or) =>
- val lhsSet = splitDisjunctivePredicates(lhs).toSet
- val rhsSet = splitDisjunctivePredicates(rhs).toSet
- lhsSet.foldLeft(rhsSet) { (set, e) =>
- set.find(compareConditions(_, e)).map(set - _).getOrElse(set)
- }.isEmpty
-
- case (l, r) => l == r
- }
-
- def checkCondition(input: Expression, expected: Expression): Unit = {
+ private def checkCondition(input: Expression, expected: Expression): Unit = {
val plan = testRelation.where(input).analyze
- val actual = Optimize.execute(plan).expressions.head
- compareConditions(actual, expected)
+ val actual = Optimize.execute(plan)
+ val correctAnswer = testRelation.where(expected).analyze
+ comparePlans(actual, correctAnswer)
}
test("a && a => a") {
@@ -86,10 +68,8 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
('a === 'b && 'c < 1 && 'a === 5) ||
('a === 'b && 'b < 5 && 'a > 1)
- val expected =
- (((('b > 3) && ('c > 2)) ||
- (('c < 1) && ('a === 5))) ||
- (('b < 5) && ('a > 1))) && ('a === 'b)
+ val expected = 'a === 'b && (
+ ('b > 3 && 'c > 2) || ('c < 1 && 'a === 5) || ('b < 5 && 'a > 1))
checkCondition(input, expected)
}
@@ -101,10 +81,27 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
checkCondition('a < 2 && ('a < 2 || 'a > 3 || 'b > 5) , 'a < 2)
- checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2)
+ checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), 'a < 2 || ('b > 3 && 'c > 5))
checkCondition(
('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5),
- ('b > 3 && 'a > 3 && 'a < 5) || 'a === 'b)
+ ('a === 'b || 'b > 3 && 'a > 3 && 'a < 5))
+ }
+
+ private def caseInsensitiveAnalyse(plan: LogicalPlan) =
+ AnalysisSuite.caseInsensitiveAnalyzer.execute(plan)
+
+ test("(a && b) || (a && c) => a && (b || c) when case insensitive") {
+ val plan = caseInsensitiveAnalyse(testRelation.where(('a > 2 && 'b > 3) || ('A > 2 && 'b < 5)))
+ val actual = Optimize.execute(plan)
+ val expected = caseInsensitiveAnalyse(testRelation.where('a > 2 && ('b > 3 || 'b < 5)))
+ comparePlans(actual, expected)
+ }
+
+ test("(a || b) && (a || c) => a || (b && c) when case insensitive") {
+ val plan = caseInsensitiveAnalyse(testRelation.where(('a > 2 || 'b > 3) && ('A > 2 || 'b < 5)))
+ val actual = Optimize.execute(plan)
+ val expected = caseInsensitiveAnalyse(testRelation.where('a > 2 || ('b > 3 && 'b < 5)))
+ comparePlans(actual, expected)
}
}