aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2016-10-11 16:06:40 +0800
committerWenchen Fan <wenchen@databricks.com>2016-10-11 16:06:40 +0800
commitc8c090640ab73624841d0f4abcfd7409a0838725 (patch)
tree91c17aab5210b758998177e04f67bb0f6eea2452
parent3694ba48f0db0f47baea4b005cdeef3f454b7329 (diff)
downloadspark-c8c090640ab73624841d0f4abcfd7409a0838725.tar.gz
spark-c8c090640ab73624841d0f4abcfd7409a0838725.tar.bz2
spark-c8c090640ab73624841d0f4abcfd7409a0838725.zip
[SPARK-17821][SQL] Support And and Or in Expression Canonicalize
## What changes were proposed in this pull request? Currently `Canonicalize` object doesn't support `And` and `Or`. So we can compare canonicalized form of predicates consistently. We should add the support. ## How was this patch tested? Jenkins tests. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #15388 from viirya/canonicalize-and-or.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala7
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala82
2 files changed, 89 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index 07ba7d5e4a..e876450c73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -62,6 +62,13 @@ object Canonicalize extends {
case a: Add => orderCommutative(a, { case Add(l, r) => Seq(l, r) }).reduce(Add)
case m: Multiply => orderCommutative(m, { case Multiply(l, r) => Seq(l, r) }).reduce(Multiply)
+ case o: Or =>
+ orderCommutative(o, { case Or(l, r) if l.deterministic && r.deterministic => Seq(l, r) })
+ .reduce(Or)
+ case a: And =>
+ orderCommutative(a, { case And(l, r) if l.deterministic && r.deterministic => Seq(l, r)})
+ .reduce(And)
+
case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l)
case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
index 60939ee0ed..c587d4f632 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
@@ -80,6 +80,88 @@ class ExpressionSetSuite extends SparkFunSuite {
setTest(1, Not(aUpper >= 1), aUpper < 1, Not(Literal(1) <= aUpper), Literal(1) > aUpper)
setTest(1, Not(aUpper <= 1), aUpper > 1, Not(Literal(1) >= aUpper), Literal(1) < aUpper)
+ // Reordering AND/OR expressions
+ setTest(1, aUpper > bUpper && aUpper <= 10, aUpper <= 10 && aUpper > bUpper)
+ setTest(1,
+ aUpper > bUpper && bUpper > 100 && aUpper <= 10,
+ bUpper > 100 && aUpper <= 10 && aUpper > bUpper)
+
+ setTest(1, aUpper > bUpper || aUpper <= 10, aUpper <= 10 || aUpper > bUpper)
+ setTest(1,
+ aUpper > bUpper || bUpper > 100 || aUpper <= 10,
+ bUpper > 100 || aUpper <= 10 || aUpper > bUpper)
+
+ setTest(1,
+ (aUpper <= 10 && aUpper > bUpper) || bUpper > 100,
+ bUpper > 100 || (aUpper <= 10 && aUpper > bUpper))
+
+ setTest(1,
+ aUpper >= bUpper || (aUpper > 10 && bUpper < 10),
+ (bUpper < 10 && aUpper > 10) || aUpper >= bUpper)
+
+ // More complicated cases mixing AND/OR
+ // Three predicates in the following:
+ // (bUpper > 100)
+ // (aUpper < 100 && bUpper <= aUpper)
+ // (aUpper >= 10 && bUpper >= 50)
+ // They can be reordered and the sub-predicates contained in each of them can be reordered too.
+ setTest(1,
+ (bUpper > 100) || (aUpper < 100 && bUpper <= aUpper) || (aUpper >= 10 && bUpper >= 50),
+ (aUpper >= 10 && bUpper >= 50) || (bUpper > 100) || (aUpper < 100 && bUpper <= aUpper),
+ (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100) || (bUpper > 100))
+
+ // Two predicates in the following:
+ // (bUpper > 100 && aUpper < 100 && bUpper <= aUpper)
+ // (aUpper >= 10 && bUpper >= 50)
+ setTest(1,
+ (bUpper > 100 && aUpper < 100 && bUpper <= aUpper) || (aUpper >= 10 && bUpper >= 50),
+ (aUpper >= 10 && bUpper >= 50) || (aUpper < 100 && bUpper > 100 && bUpper <= aUpper),
+ (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100 && bUpper > 100))
+
+ // Three predicates in the following:
+ // (aUpper >= 10)
+ // (bUpper <= 10 && aUpper === bUpper && aUpper < 100)
+ // (bUpper >= 100)
+ setTest(1,
+ (aUpper >= 10) || (bUpper <= 10 && aUpper === bUpper && aUpper < 100) || (bUpper >= 100),
+ (aUpper === bUpper && aUpper < 100 && bUpper <= 10) || (bUpper >= 100) || (aUpper >= 10),
+ (aUpper < 100 && bUpper <= 10 && aUpper === bUpper) || (aUpper >= 10) || (bUpper >= 100),
+ ((bUpper <= 10 && aUpper === bUpper) && aUpper < 100) || ((aUpper >= 10) || (bUpper >= 100)))
+
+ // Don't reorder non-deterministic expression in AND/OR.
+ setTest(2, Rand(1L) > aUpper && aUpper <= 10, aUpper <= 10 && Rand(1L) > aUpper)
+ setTest(2,
+ aUpper > bUpper && bUpper > 100 && Rand(1L) > aUpper,
+ bUpper > 100 && Rand(1L) > aUpper && aUpper > bUpper)
+
+ setTest(2, Rand(1L) > aUpper || aUpper <= 10, aUpper <= 10 || Rand(1L) > aUpper)
+ setTest(2,
+ aUpper > bUpper || aUpper <= Rand(1L) || aUpper <= 10,
+ aUpper <= Rand(1L) || aUpper <= 10 || aUpper > bUpper)
+
+ // Partial reorder case: we don't reorder non-deterministic expressions,
+ // but we can reorder sub-expressions in deterministic AND/OR expressions.
+ // There are two predicates:
+ // (aUpper > bUpper || bUpper > 100) => we can reorder sub-expressions in it.
+ // (aUpper === Rand(1L))
+ setTest(1,
+ (aUpper > bUpper || bUpper > 100) && aUpper === Rand(1L),
+ (bUpper > 100 || aUpper > bUpper) && aUpper === Rand(1L))
+
+ // There are three predicates:
+ // (Rand(1L) > aUpper)
+ // (aUpper <= Rand(1L) && aUpper > bUpper)
+ // (aUpper > 10 && bUpper > 10) => we can reorder sub-expressions in it.
+ setTest(1,
+ Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (aUpper > 10 && bUpper > 10),
+ Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (bUpper > 10 && aUpper > 10))
+
+ // Same predicates as above, but a negative case when we reorder non-deterministic
+ // expression in (aUpper <= Rand(1L) && aUpper > bUpper).
+ setTest(2,
+ Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (aUpper > 10 && bUpper > 10),
+ Rand(1L) > aUpper || (aUpper > bUpper && aUpper <= Rand(1L)) || (aUpper > 10 && bUpper > 10))
+
test("add to / remove from set") {
val initialSet = ExpressionSet(aUpper + 1 :: Nil)