aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-03-21 10:34:54 +0800
committerWenchen Fan <wenchen@databricks.com>2016-03-21 10:34:54 +0800
commitf58319a24fd5e026411538b1fb7336d9d894277b (patch)
treec109f1f3e23cacf6f0cb4f0d2c78088f5514f531 /sql/catalyst
parent454a00df2a43176cb774cad7277934a775618db1 (diff)
downloadspark-f58319a24fd5e026411538b1fb7336d9d894277b.tar.gz
spark-f58319a24fd5e026411538b1fb7336d9d894277b.tar.bz2
spark-f58319a24fd5e026411538b1fb7336d9d894277b.zip
[SPARK-14019][SQL] Remove noop SortOrder in Sort
#### What changes were proposed in this pull request? This PR is to add a new Optimizer rule for pruning Sort if its SortOrder is no-op. In the phase of **Optimizer**, if a specific `SortOrder` does not have any reference, it has no effect on the sorting results. If `Sort` is empty, remove the whole `Sort`. For example, in the following SQL query ```SQL SELECT * FROM t ORDER BY NULL + 5 ``` Before the fix, the plan is like ``` == Analyzed Logical Plan == a: int, b: int Sort [(cast(null as int) + 5) ASC], true +- Project [a#92,b#93] +- SubqueryAlias t +- Project [_1#89 AS a#92,_2#90 AS b#93] +- LocalRelation [_1#89,_2#90], [[1,2],[1,2]] == Optimized Logical Plan == Sort [null ASC], true +- LocalRelation [a#92,b#93], [[1,2],[1,2]] == Physical Plan == WholeStageCodegen : +- Sort [null ASC], true, 0 : +- INPUT +- Exchange rangepartitioning(null ASC, 5), None +- LocalTableScan [a#92,b#93], [[1,2],[1,2]] ``` After the fix, the plan is like ``` == Analyzed Logical Plan == a: int, b: int Sort [(cast(null as int) + 5) ASC], true +- Project [a#92,b#93] +- SubqueryAlias t +- Project [_1#89 AS a#92,_2#90 AS b#93] +- LocalRelation [_1#89,_2#90], [[1,2],[1,2]] == Optimized Logical Plan == LocalRelation [a#92,b#93], [[1,2],[1,2]] == Physical Plan == LocalTableScan [a#92,b#93], [[1,2],[1,2]] ``` cc rxin cloud-fan marmbrus Thanks! #### How was this patch tested? Added a test suite for covering this rule Author: gatorsmile <gatorsmile@gmail.com> Closes #11840 from gatorsmile/sortElimination.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala12
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala66
2 files changed, 78 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c419b5fd22..41e8dc0f46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -87,6 +87,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] {
SimplifyConditionals,
RemoveDispensableExpressions,
PruneFilters,
+ EliminateSorts,
SimplifyCasts,
SimplifyCaseConversionExpressions,
EliminateSerialization) ::
@@ -826,6 +827,17 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
}
/**
+ * Removes no-op SortOrder from Sort
+ */
+object EliminateSorts extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+ case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) =>
+ val newOrders = orders.filterNot(_.child.foldable)
+ if (newOrders.isEmpty) child else s.copy(order = newOrders)
+ }
+}
+
+/**
* Removes filters that can be evaluated trivially. This can be done through the following ways:
* 1) by eliding the filter for cases where it will always evaluate to `true`.
* 2) by substituting a dummy empty relation when the filter will always evaluate to `false`.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
new file mode 100644
index 0000000000..27c15e856a
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+
+class EliminateSortsSuite extends PlanTest {
+
+ object Optimize extends RuleExecutor[LogicalPlan] {
+ val batches =
+ Batch("Eliminate Sorts", Once,
+ EliminateSorts) :: Nil
+ }
+
+ val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+ test("Empty order by clause") {
+ val x = testRelation
+
+ val query = x.orderBy()
+ val optimized = Optimize.execute(query.analyze)
+ val correctAnswer = x.analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("All the SortOrder are no-op") {
+ val x = testRelation
+
+ val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending))
+ val optimized = Optimize.execute(query.analyze)
+ val correctAnswer = x.analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("Partial order-by clauses contain no-op SortOrder") {
+ val x = testRelation
+
+ val query = x.orderBy(SortOrder(3, Ascending), 'a.asc)
+ val optimized = Optimize.execute(query.analyze)
+ val correctAnswer = x.orderBy('a.asc).analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+}