diff options
author | gatorsmile <gatorsmile@gmail.com> | 2016-03-21 10:34:54 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-03-21 10:34:54 +0800 |
commit | f58319a24fd5e026411538b1fb7336d9d894277b (patch) | |
tree | c109f1f3e23cacf6f0cb4f0d2c78088f5514f531 | |
parent | 454a00df2a43176cb774cad7277934a775618db1 (diff) | |
download | spark-f58319a24fd5e026411538b1fb7336d9d894277b.tar.gz spark-f58319a24fd5e026411538b1fb7336d9d894277b.tar.bz2 spark-f58319a24fd5e026411538b1fb7336d9d894277b.zip |
[SPARK-14019][SQL] Remove noop SortOrder in Sort
#### What changes were proposed in this pull request?
This PR is to add a new Optimizer rule for pruning Sort if its SortOrder is no-op. In the phase of **Optimizer**, if a specific `SortOrder` does not have any reference, it has no effect on the sorting results. If `Sort` is empty, remove the whole `Sort`.
For example, in the following SQL query
```SQL
SELECT * FROM t ORDER BY NULL + 5
```
Before the fix, the plan is like
```
== Analyzed Logical Plan ==
a: int, b: int
Sort [(cast(null as int) + 5) ASC], true
+- Project [a#92,b#93]
+- SubqueryAlias t
+- Project [_1#89 AS a#92,_2#90 AS b#93]
+- LocalRelation [_1#89,_2#90], [[1,2],[1,2]]
== Optimized Logical Plan ==
Sort [null ASC], true
+- LocalRelation [a#92,b#93], [[1,2],[1,2]]
== Physical Plan ==
WholeStageCodegen
: +- Sort [null ASC], true, 0
: +- INPUT
+- Exchange rangepartitioning(null ASC, 5), None
+- LocalTableScan [a#92,b#93], [[1,2],[1,2]]
```
After the fix, the plan is like
```
== Analyzed Logical Plan ==
a: int, b: int
Sort [(cast(null as int) + 5) ASC], true
+- Project [a#92,b#93]
+- SubqueryAlias t
+- Project [_1#89 AS a#92,_2#90 AS b#93]
+- LocalRelation [_1#89,_2#90], [[1,2],[1,2]]
== Optimized Logical Plan ==
LocalRelation [a#92,b#93], [[1,2],[1,2]]
== Physical Plan ==
LocalTableScan [a#92,b#93], [[1,2],[1,2]]
```
cc rxin cloud-fan marmbrus Thanks!
#### How was this patch tested?
Added a test suite for covering this rule
Author: gatorsmile <gatorsmile@gmail.com>
Closes #11840 from gatorsmile/sortElimination.
2 files changed, 78 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index c419b5fd22..41e8dc0f46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -87,6 +87,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { SimplifyConditionals, RemoveDispensableExpressions, PruneFilters, + EliminateSorts, SimplifyCasts, SimplifyCaseConversionExpressions, EliminateSerialization) :: @@ -826,6 +827,17 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper { } /** + * Removes no-op SortOrder from Sort + */ +object EliminateSorts extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) => + val newOrders = orders.filterNot(_.child.foldable) + if (newOrders.isEmpty) child else s.copy(order = newOrders) + } +} + +/** * Removes filters that can be evaluated trivially. This can be done through the following ways: * 1) by eliding the filter for cases where it will always evaluate to `true`. * 2) by substituting a dummy empty relation when the filter will always evaluate to `false`. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala new file mode 100644 index 0000000000..27c15e856a --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules._ + +class EliminateSortsSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("Eliminate Sorts", Once, + EliminateSorts) :: Nil + } + + val testRelation = LocalRelation('a.int, 'b.int, 'c.int) + + test("Empty order by clause") { + val x = testRelation + + val query = x.orderBy() + val optimized = Optimize.execute(query.analyze) + val correctAnswer = x.analyze + + comparePlans(optimized, correctAnswer) + } + + test("All the SortOrder are no-op") { + val x = testRelation + + val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) + val optimized = Optimize.execute(query.analyze) + val correctAnswer = x.analyze + + comparePlans(optimized, correctAnswer) + } + + test("Partial order-by clauses contain no-op SortOrder") { + val x = testRelation + + val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) + val optimized = Optimize.execute(query.analyze) + val correctAnswer = x.orderBy('a.asc).analyze + + comparePlans(optimized, correctAnswer) + } +} |