From c632bdc01f51bb253fa3dc258ffa7fdecf814d35 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 22 Mar 2016 10:17:08 -0700 Subject: [SPARK-14029][SQL] Improve BooleanSimplification optimization by implementing `Not` canonicalization. ## What changes were proposed in this pull request? Currently, **BooleanSimplification** optimization can handle the following cases. * a && (!a || b ) ==> a && b * a && (b || !a ) ==> a && b However, it can not handle the followings cases since those equations fail at the comparisons between their canonicalized forms. * a < 1 && (!(a < 1) || b) ==> (a < 1) && b * a <= 1 && (!(a <= 1) || b) ==> (a <= 1) && b * a > 1 && (!(a > 1) || b) ==> (a > 1) && b * a >= 1 && (!(a >= 1) || b) ==> (a >= 1) && b This PR implements the above cases and also the followings, too. * a < 1 && ((a >= 1) || b ) ==> (a < 1) && b * a <= 1 && ((a > 1) || b ) ==> (a <= 1) && b * a > 1 && ((a <= 1) || b) ==> (a > 1) && b * a >= 1 && ((a < 1) || b) ==> (a >= 1) && b ## How was this patch tested? Pass the Jenkins tests including new test cases in BooleanSimplicationSuite. Author: Dongjoon Hyun Closes #11851 from dongjoon-hyun/SPARK-14029. --- .../sql/catalyst/expressions/Canonicalize.scala | 9 +++++++ .../catalyst/expressions/ExpressionSetSuite.scala | 6 +++++ .../optimizer/BooleanSimplificationSuite.scala | 28 ++++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala index ae1f600613..07ba7d5e4a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala @@ -71,6 +71,15 @@ object Canonicalize extends { case GreaterThanOrEqual(l, r) if l.hashCode() > r.hashCode() => LessThanOrEqual(r, l) case LessThanOrEqual(l, r) if l.hashCode() > r.hashCode() => GreaterThanOrEqual(r, l) + case Not(GreaterThan(l, r)) if l.hashCode() > r.hashCode() => GreaterThan(r, l) + case Not(GreaterThan(l, r)) => LessThanOrEqual(l, r) + case Not(LessThan(l, r)) if l.hashCode() > r.hashCode() => LessThan(r, l) + case Not(LessThan(l, r)) => GreaterThanOrEqual(l, r) + case Not(GreaterThanOrEqual(l, r)) if l.hashCode() > r.hashCode() => GreaterThanOrEqual(r, l) + case Not(GreaterThanOrEqual(l, r)) => LessThan(l, r) + case Not(LessThanOrEqual(l, r)) if l.hashCode() > r.hashCode() => LessThanOrEqual(r, l) + case Not(LessThanOrEqual(l, r)) => GreaterThan(l, r) + case _ => e } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index 0b350c6a98..60939ee0ed 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -74,6 +74,12 @@ class ExpressionSetSuite extends SparkFunSuite { setTest(1, aUpper > bUpper, bUpper < aUpper) setTest(1, aUpper >= bUpper, bUpper <= aUpper) + // `Not` canonicalization + setTest(1, Not(aUpper > 1), aUpper <= 1, Not(Literal(1) < aUpper), Literal(1) >= aUpper) + setTest(1, Not(aUpper < 1), aUpper >= 1, Not(Literal(1) > aUpper), Literal(1) <= aUpper) + setTest(1, Not(aUpper >= 1), aUpper < 1, Not(Literal(1) <= aUpper), Literal(1) > aUpper) + setTest(1, Not(aUpper <= 1), aUpper > 1, Not(Literal(1) >= aUpper), Literal(1) < aUpper) + test("add to / remove from set") { val initialSet = ExpressionSet(aUpper + 1 :: Nil) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index 47b79fe462..2ab31eea8a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -99,6 +99,34 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { checkCondition(('b || !'a ) && 'a, 'b && 'a) } + test("a < 1 && (!(a < 1) || b)") { + checkCondition('a < 1 && (!('a < 1) || 'b), ('a < 1) && 'b) + checkCondition('a < 1 && ('b || !('a < 1)), ('a < 1) && 'b) + + checkCondition('a <= 1 && (!('a <= 1) || 'b), ('a <= 1) && 'b) + checkCondition('a <= 1 && ('b || !('a <= 1)), ('a <= 1) && 'b) + + checkCondition('a > 1 && (!('a > 1) || 'b), ('a > 1) && 'b) + checkCondition('a > 1 && ('b || !('a > 1)), ('a > 1) && 'b) + + checkCondition('a >= 1 && (!('a >= 1) || 'b), ('a >= 1) && 'b) + checkCondition('a >= 1 && ('b || !('a >= 1)), ('a >= 1) && 'b) + } + + test("a < 1 && ((a >= 1) || b)") { + checkCondition('a < 1 && ('a >= 1 || 'b ), ('a < 1) && 'b) + checkCondition('a < 1 && ('b || 'a >= 1), ('a < 1) && 'b) + + checkCondition('a <= 1 && ('a > 1 || 'b ), ('a <= 1) && 'b) + checkCondition('a <= 1 && ('b || 'a > 1), ('a <= 1) && 'b) + + checkCondition('a > 1 && (('a <= 1) || 'b), ('a > 1) && 'b) + checkCondition('a > 1 && ('b || ('a <= 1)), ('a > 1) && 'b) + + checkCondition('a >= 1 && (('a < 1) || 'b), ('a >= 1) && 'b) + checkCondition('a >= 1 && ('b || ('a < 1)), ('a >= 1) && 'b) + } + test("DeMorgan's law") { checkCondition(!('a && 'b), !'a || !'b) -- cgit v1.2.3