From 8f8d8a2315514cd1f3609bc06e5cf6e6d06fdd91 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 2 Mar 2016 09:59:22 -0800 Subject: [SPARK-13609] [SQL] Support Column Pruning for MapPartitions #### What changes were proposed in this pull request? This PR is to prune unnecessary columns when the operator is `MapPartitions`. The solution is to add an extra `Project` in the child node. For the other two operators `AppendColumns` and `MapGroups`, it sounds doable. More discussions are required. The major reason is the current implementation of the `inputPlan` of `groupBy` is based on the child of `AppendColumns`. It might be a bug? Thus, will submit a separate PR. #### How was this patch tested? Added a test case in ColumnPruningSuite to verify the rule. Added another test case in DatasetSuite.scala to verify the data. Author: gatorsmile Closes #11460 from gatorsmile/datasetPruningNew. --- .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'sql/core/src') diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 33df6375e3..79e10215f4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -113,7 +113,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext { ("a", 2), ("b", 3), ("c", 4)) } - test("map with type change") { + test("map with type change with the exact matched number of attributes") { val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS() checkAnswer( @@ -123,6 +123,15 @@ class DatasetSuite extends QueryTest with SharedSQLContext { OtherTuple("a", 1), OtherTuple("b", 2), OtherTuple("c", 3)) } + test("map with type change with less attributes") { + val ds = Seq(("a", 1, 3), ("b", 2, 4), ("c", 3, 5)).toDS() + + checkAnswer( + ds.as[OtherTuple] + .map(identity[OtherTuple]), + OtherTuple("a", 1), OtherTuple("b", 2), OtherTuple("c", 3)) + } + test("map and group by with class data") { // We inject a group by here to make sure this test case is future proof // when we implement better pipelining and local execution mode. -- cgit v1.2.3