diff options
author | Takuya UESHIN <ueshin@happy-camper.st> | 2016-07-12 17:16:59 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-07-12 17:16:59 +0800 |
commit | 5b28e02584fa4da85214e7da6d77b3b8e189b781 (patch) | |
tree | a89c74943d5b39fc6af322ff2b24b21c7ba75660 /sql/core/src/test | |
parent | fc11c509e234c5414687f7fbd13af113a1f52f10 (diff) | |
download | spark-5b28e02584fa4da85214e7da6d77b3b8e189b781.tar.gz spark-5b28e02584fa4da85214e7da6d77b3b8e189b781.tar.bz2 spark-5b28e02584fa4da85214e7da6d77b3b8e189b781.zip |
[SPARK-16189][SQL] Add ExternalRDD logical plan for input with RDD to have a chance to eliminate serialize/deserialize.
## What changes were proposed in this pull request?
Currently the input `RDD` of `Dataset` is always serialized to `RDD[InternalRow]` prior to being as `Dataset`, but there is a case that we use `map` or `mapPartitions` just after converted to `Dataset`.
In this case, serialize and then deserialize happens but it would not be needed.
This pr adds `ExistingRDD` logical plan for input with `RDD` to have a chance to eliminate serialize/deserialize.
## How was this patch tested?
Existing tests.
Author: Takuya UESHIN <ueshin@happy-camper.st>
Closes #13890 from ueshin/issues/SPARK-16189.
Diffstat (limited to 'sql/core/src/test')
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index ab505139a8..a9d0fcf1b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.execution.LogicalRDD +import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression import org.apache.spark.sql.execution.columnar.InMemoryRelation import org.apache.spark.sql.execution.datasources.LogicalRelation @@ -242,6 +242,12 @@ abstract class QueryTest extends PlanTest { case _: LogicalRelation => return case p if p.getClass.getSimpleName == "MetastoreRelation" => return case _: MemoryPlan => return + case p: InMemoryRelation => + p.child.transform { + case _: ObjectConsumerExec => return + case _: ObjectProducerExec => return + } + p }.transformAllExpressions { case a: ImperativeAggregate => return case _: TypedAggregateExpression => return |