aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-03-09 10:29:27 -0800
committerMichael Armbrust <michael@databricks.com>2016-03-09 10:29:27 -0800
commit23369c3bd2c6a6d7a2b9d1396d6962022676cee7 (patch)
treed6deafc09628349c4ba0c3aad16b7519fd61ef41 /sql
parent256704c771d301700af9ebf0d180c1ba7c4116c0 (diff)
downloadspark-23369c3bd2c6a6d7a2b9d1396d6962022676cee7.tar.gz
spark-23369c3bd2c6a6d7a2b9d1396d6962022676cee7.tar.bz2
spark-23369c3bd2c6a6d7a2b9d1396d6962022676cee7.zip
[SPARK-13763][SQL] Remove Project when its Child's Output is Nil
#### What changes were proposed in this pull request? As shown in another PR: https://github.com/apache/spark/pull/11596, we are using `SELECT 1` as a dummy table, when the table is used for SQL statements in which a table reference is required, but the contents of the table are not important. For example, ```SQL SELECT value FROM (select 1) dummyTable Lateral View explode(array(1,2,3)) adTable as value ``` Before the PR, the optimized plan contains a useless `Project` after Optimizer executing the `ColumnPruning` rule, as shown below: ``` == Analyzed Logical Plan == value: int Project [value#22] +- Generate explode(array(1, 2, 3)), true, false, Some(adtable), [value#22] +- SubqueryAlias dummyTable +- Project [1 AS 1#21] +- OneRowRelation$ == Optimized Logical Plan == Generate explode([1,2,3]), false, false, Some(adtable), [value#22] +- Project +- OneRowRelation$ ``` After the fix, the optimized plan removed the useless `Project`, as shown below: ``` == Optimized Logical Plan == Generate explode([1,2,3]), false, false, Some(adtable), [value#22] +- OneRowRelation$ ``` This PR is to remove `Project` when its Child's output is Nil #### How was this patch tested? Added a new unit test case into the suite `ColumnPruningSuite.scala` Author: gatorsmile <gatorsmile@gmail.com> Closes #11599 from gatorsmile/projectOneRowRelation.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala6
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala16
2 files changed, 19 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 7455e68ee8..586bf3d4dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -381,12 +381,12 @@ object ColumnPruning extends Rule[LogicalPlan] {
p
}
- // Can't prune the columns on LeafNode
- case p @ Project(_, l: LeafNode) => p
-
// Eliminate no-op Projects
case p @ Project(projectList, child) if sameOutput(child.output, p.output) => child
+ // Can't prune the columns on LeafNode
+ case p @ Project(_, l: LeafNode) => p
+
// for all other logical plans that inherits the output from it's children
case p @ Project(_, child) =>
val required = child.references ++ p.references
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index d09601e034..409e92238e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -157,6 +157,22 @@ class ColumnPruningSuite extends PlanTest {
comparePlans(Optimize.execute(query), expected)
}
+ test("Eliminate the Project with an empty projectList") {
+ val input = OneRowRelation
+ val expected = Project(Literal(1).as("1") :: Nil, input).analyze
+
+ val query1 =
+ Project(Literal(1).as("1") :: Nil, Project(Literal(1).as("1") :: Nil, input)).analyze
+ comparePlans(Optimize.execute(query1), expected)
+
+ val query2 =
+ Project(Literal(1).as("1") :: Nil, Project(Nil, input)).analyze
+ comparePlans(Optimize.execute(query2), expected)
+
+ // to make sure the top Project will not be removed.
+ comparePlans(Optimize.execute(expected), expected)
+ }
+
test("column pruning for group") {
val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
val originalQuery =