aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhongshuai Pei <799203320@qq.com>2015-06-11 17:01:02 -0700
committerMichael Armbrust <michael@databricks.com>2015-06-11 17:01:07 -0700
commit7914c720bf7447e8c9d96d564eafd6b687d2fc1a (patch)
tree48d07c10e8080501f65c9bebf26a9ab4dad5db74
parent7d669a56ffc7a4f5827830ef3c27d45cc0e8774f (diff)
downloadspark-7914c720bf7447e8c9d96d564eafd6b687d2fc1a.tar.gz
spark-7914c720bf7447e8c9d96d564eafd6b687d2fc1a.tar.bz2
spark-7914c720bf7447e8c9d96d564eafd6b687d2fc1a.zip
[SPARK-7824] [SQL] Collapse operator reordering and constant folding into a single batch.
SQL ``` select * from tableA join tableB on (a > 3 and b = d) or (a > 3 and b = e) ``` Plan before modify ``` == Optimized Logical Plan == Project [a#293,b#294,c#295,d#296,e#297] Join Inner, Some(((a#293 > 3) && ((b#294 = d#296) || (b#294 = e#297)))) MetastoreRelation default, tablea, None MetastoreRelation default, tableb, None ``` Plan after modify ``` == Optimized Logical Plan == Project [a#293,b#294,c#295,d#296,e#297] Join Inner, Some(((b#294 = d#296) || (b#294 = e#297))) Filter (a#293 > 3) MetastoreRelation default, tablea, None MetastoreRelation default, tableb, None ``` CombineLimits ==> Limit(If(LessThan(ne, le), ne, le), grandChild) and LessThan is in BooleanSimplification , so CombineLimits must before BooleanSimplification and BooleanSimplification must before PushPredicateThroughJoin. Author: Zhongshuai Pei <799203320@qq.com> Author: DoingDone9 <799203320@qq.com> Closes #6351 from DoingDone9/master and squashes the following commits: 20de7be [Zhongshuai Pei] Update Optimizer.scala 7bc7d28 [Zhongshuai Pei] Merge pull request #17 from apache/master 0ba5f42 [Zhongshuai Pei] Update Optimizer.scala f8b9314 [Zhongshuai Pei] Update FilterPushdownSuite.scala c529d9f [Zhongshuai Pei] Update FilterPushdownSuite.scala ae3af6d [Zhongshuai Pei] Update FilterPushdownSuite.scala a04ffae [Zhongshuai Pei] Update Optimizer.scala 11beb61 [Zhongshuai Pei] Update FilterPushdownSuite.scala f2ee5fe [Zhongshuai Pei] Update Optimizer.scala be6b1d5 [Zhongshuai Pei] Update Optimizer.scala b01e622 [Zhongshuai Pei] Merge pull request #15 from apache/master 8df716a [Zhongshuai Pei] Update FilterPushdownSuite.scala d98bc35 [Zhongshuai Pei] Update FilterPushdownSuite.scala fa65718 [Zhongshuai Pei] Update Optimizer.scala ab8e9a6 [Zhongshuai Pei] Merge pull request #14 from apache/master 14952e2 [Zhongshuai Pei] Merge pull request #13 from apache/master f03fe7f [Zhongshuai Pei] Merge pull request #12 from apache/master f12fa50 [Zhongshuai Pei] Merge pull request #10 from apache/master f61210c [Zhongshuai Pei] Merge pull request #9 from apache/master 34b1a9a [Zhongshuai Pei] Merge pull request #8 from apache/master 802261c [DoingDone9] Merge pull request #7 from apache/master d00303b [DoingDone9] Merge pull request #6 from apache/master 98b134f [DoingDone9] Merge pull request #5 from apache/master 161cae3 [DoingDone9] Merge pull request #4 from apache/master c87e8b6 [DoingDone9] Merge pull request #3 from apache/master cb1852d [DoingDone9] Merge pull request #2 from apache/master c3f046f [DoingDone9] Merge pull request #1 from apache/master
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala7
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala31
2 files changed, 27 insertions, 11 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c16f08d389..f8f1efcc7e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -38,21 +38,20 @@ object DefaultOptimizer extends Optimizer {
EliminateSubQueries) ::
Batch("Distinct", FixedPoint(100),
ReplaceDistinctWithAggregate) ::
- Batch("Operator Reordering", FixedPoint(100),
+ Batch("Operator Optimizations", FixedPoint(100),
UnionPushdown,
CombineFilters,
PushPredicateThroughProject,
- PushPredicateThroughJoin,
PushPredicateThroughGenerate,
ColumnPruning,
ProjectCollapsing,
- CombineLimits) ::
- Batch("ConstantFolding", FixedPoint(100),
+ CombineLimits,
NullPropagation,
OptimizeIn,
ConstantFolding,
LikeSimplification,
BooleanSimplification,
+ PushPredicateThroughJoin,
SimplifyFilters,
SimplifyCasts,
SimplifyCaseConversionExpressions) ::
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 17dc912474..ffdc673cdc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -36,6 +36,7 @@ class FilterPushdownSuite extends PlanTest {
Batch("Filter Pushdown", Once,
CombineFilters,
PushPredicateThroughProject,
+ BooleanSimplification,
PushPredicateThroughJoin,
PushPredicateThroughGenerate,
ColumnPruning,
@@ -156,11 +157,9 @@ class FilterPushdownSuite extends PlanTest {
.where('a === 1 && 'a === 2)
.select('a).analyze
-
comparePlans(optimized, correctAnswer)
}
-
test("joins: push to either side") {
val x = testRelation.subquery('x)
val y = testRelation.subquery('y)
@@ -198,6 +197,25 @@ class FilterPushdownSuite extends PlanTest {
comparePlans(optimized, correctAnswer)
}
+ test("joins: push to one side after transformCondition") {
+ val x = testRelation.subquery('x)
+ val y = testRelation1.subquery('y)
+
+ val originalQuery = {
+ x.join(y)
+ .where(("x.a".attr === 1 && "y.d".attr === "x.b".attr) ||
+ ("x.a".attr === 1 && "y.d".attr === "x.c".attr))
+ }
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val left = testRelation.where('a === 1)
+ val right = testRelation1
+ val correctAnswer =
+ left.join(right, condition = Some("d".attr === "b".attr || "d".attr === "c".attr)).analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
test("joins: rewrite filter to push to either side") {
val x = testRelation.subquery('x)
val y = testRelation.subquery('y)
@@ -563,17 +581,16 @@ class FilterPushdownSuite extends PlanTest {
// push down invalid
val originalQuery1 = {
x.select('a, 'b)
- .sortBy(SortOrder('a, Ascending))
- .select('b)
+ .sortBy(SortOrder('a, Ascending))
+ .select('b)
}
val optimized1 = Optimize.execute(originalQuery1.analyze)
val correctAnswer1 =
x.select('a, 'b)
- .sortBy(SortOrder('a, Ascending))
- .select('b).analyze
+ .sortBy(SortOrder('a, Ascending))
+ .select('b).analyze
comparePlans(optimized1, analysis.EliminateSubQueries(correctAnswer1))
-
}
}