[SPARK-13466] [SQL] Remove projects that become redundant after column pruning rule

JIRA: https://issues.apache.org/jira/browse/SPARK-13466 ## What changes were proposed in this pull request? With column pruning rule in optimizer, some Project operators will become redundant. We should remove these redundant Projects. For an example query: val input = LocalRelation('key.int, 'value.string) val query = Project(Seq($"x.key", $"y.key"), Join( SubqueryAlias("x", input), BroadcastHint(SubqueryAlias("y", input)), Inner, None)) After the first run of column pruning, it would like: Project(Seq($"x.key", $"y.key"), Join( Project(Seq($"x.key"), SubqueryAlias("x", input)), Project(Seq($"y.key"), <-- inserted by the rule BroadcastHint(SubqueryAlias("y", input))), Inner, None)) Actually we don't need the outside Project now. This patch will remove it: Join( Project(Seq($"x.key"), SubqueryAlias("x", input)), Project(Seq($"y.key"), BroadcastHint(SubqueryAlias("y", input))), Inner, None) ## How was the this patch tested? Unit test is added into ColumnPruningSuite. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #11341 from viirya/remove-redundant-project.
author: Liang-Chi Hsieh <viirya@gmail.com> 2016-03-03 00:06:46 -0800
committer: Davies Liu <davies.liu@gmail.com> 2016-03-03 00:06:46 -0800
commit: 7b25dc7b7e5a098552c0d640eee132b83d42db56 (patch)
tree: 205ce3b1300420b6d28ca6fb39d4985cc5ad6bdc /sql/core/src
parent: 1085bd862a3f69e14e00cec11077e54ab153098b (diff)
download: spark-7b25dc7b7e5a098552c0d640eee132b83d42db56.tar.gz
spark-7b25dc7b7e5a098552c0d640eee132b83d42db56.tar.bz2
spark-7b25dc7b7e5a098552c0d640eee132b83d42db56.zip
1 files changed, 4 insertions, 4 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 5b4f6f1d24..f754acb761 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -172,7 +172,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = sqlContext.sql(
         "SELECT * FROM testData2 JOIN testDataForJoin ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df, 1, Map(
-        1L -> ("SortMergeJoin", Map(
+        0L -> ("SortMergeJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
           "number of output rows" -> 4L)))
       )
@@ -190,7 +190,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = sqlContext.sql(
         "SELECT * FROM testData2 left JOIN testDataForJoin ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df, 1, Map(
-        1L -> ("SortMergeOuterJoin", Map(
+        0L -> ("SortMergeOuterJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
           "number of output rows" -> 8L)))
       )
@@ -198,7 +198,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df2 = sqlContext.sql(
         "SELECT * FROM testDataForJoin right JOIN testData2 ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df2, 1, Map(
-        1L -> ("SortMergeOuterJoin", Map(
+        0L -> ("SortMergeOuterJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
           "number of output rows" -> 8L)))
       )
@@ -298,7 +298,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = sqlContext.sql(
         "SELECT * FROM testData2 JOIN testDataForJoin")
       testSparkPlanMetrics(df, 1, Map(
-        1L -> ("CartesianProduct", Map(
+        0L -> ("CartesianProduct", Map(
           "number of output rows" -> 12L)))
       )
     }
author	Liang-Chi Hsieh <viirya@gmail.com>	2016-03-03 00:06:46 -0800
committer	Davies Liu <davies.liu@gmail.com>	2016-03-03 00:06:46 -0800
commit	7b25dc7b7e5a098552c0d640eee132b83d42db56 (patch)
tree	205ce3b1300420b6d28ca6fb39d4985cc5ad6bdc /sql/core/src
parent	1085bd862a3f69e14e00cec11077e54ab153098b (diff)
download	spark-7b25dc7b7e5a098552c0d640eee132b83d42db56.tar.gz spark-7b25dc7b7e5a098552c0d640eee132b83d42db56.tar.bz2 spark-7b25dc7b7e5a098552c0d640eee132b83d42db56.zip