aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authornitin goyal <nitin.goyal@guavus.com>2015-11-13 18:09:08 -0800
committerAndrew Or <andrew@databricks.com>2015-11-13 18:09:08 -0800
commitc939c70ac1ab6a26d9fda0a99c4e837f7e5a7935 (patch)
treee95e6afafbe45acab6aa54e1fb87ba6b4aa506e4 /core
parentbdfbc1dcaf121a1a1239857adcf54cdfe82c26dc (diff)
downloadspark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.tar.gz
spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.tar.bz2
spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.zip
[SPARK-7970] Skip closure cleaning for SQL operations
Also introduces new spark private API in RDD.scala with name 'mapPartitionsInternal' which doesn't closure cleans the RDD elements. Author: nitin goyal <nitin.goyal@guavus.com> Author: nitin.goyal <nitin.goyal@guavus.com> Closes #9253 from nitin2goyal/master.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala18
1 files changed, 18 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 800ef53cbe..2aeb5eeaad 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -706,6 +706,24 @@ abstract class RDD[T: ClassTag](
}
/**
+ * [performance] Spark's internal mapPartitions method which skips closure cleaning. It is a
+ * performance API to be used carefully only if we are sure that the RDD elements are
+ * serializable and don't require closure cleaning.
+ *
+ * @param preservesPartitioning indicates whether the input function preserves the partitioner,
+ * which should be `false` unless this is a pair RDD and the input function doesn't modify
+ * the keys.
+ */
+ private[spark] def mapPartitionsInternal[U: ClassTag](
+ f: Iterator[T] => Iterator[U],
+ preservesPartitioning: Boolean = false): RDD[U] = withScope {
+ new MapPartitionsRDD(
+ this,
+ (context: TaskContext, index: Int, iter: Iterator[T]) => f(iter),
+ preservesPartitioning)
+ }
+
+ /**
* Return a new RDD by applying a function to each partition of this RDD, while tracking the index
* of the original partition.
*