[SPARK-7970] Skip closure cleaning for SQL operations

Also introduces new spark private API in RDD.scala with name 'mapPartitionsInternal' which doesn't closure cleans the RDD elements. Author: nitin goyal <nitin.goyal@guavus.com> Author: nitin.goyal <nitin.goyal@guavus.com> Closes #9253 from nitin2goyal/master.
author: nitin goyal <nitin.goyal@guavus.com> 2015-11-13 18:09:08 -0800
committer: Andrew Or <andrew@databricks.com> 2015-11-13 18:09:08 -0800
commit: c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935 (patch)
tree: e95e6afafbe45acab6aa54e1fb87ba6b4aa506e4 /core
parent: bdfbc1dcaf121a1a1239857adcf54cdfe82c26dc (diff)
download: spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.tar.gz
spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.tar.bz2
spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.zip
1 files changed, 18 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 800ef53cbe..2aeb5eeaad 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -706,6 +706,24 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
+   * [performance] Spark's internal mapPartitions method which skips closure cleaning. It is a
+   * performance API to be used carefully only if we are sure that the RDD elements are
+   * serializable and don't require closure cleaning.
+   *
+   * @param preservesPartitioning indicates whether the input function preserves the partitioner,
+   * which should be `false` unless this is a pair RDD and the input function doesn't modify
+   * the keys.
+   */
+  private[spark] def mapPartitionsInternal[U: ClassTag](
+      f: Iterator[T] => Iterator[U],
+      preservesPartitioning: Boolean = false): RDD[U] = withScope {
+    new MapPartitionsRDD(
+      this,
+      (context: TaskContext, index: Int, iter: Iterator[T]) => f(iter),
+      preservesPartitioning)
+  }
+
+  /**
    * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
    * of the original partition.
    *
author	nitin goyal <nitin.goyal@guavus.com>	2015-11-13 18:09:08 -0800
committer	Andrew Or <andrew@databricks.com>	2015-11-13 18:09:08 -0800
commit	c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935 (patch)
tree	e95e6afafbe45acab6aa54e1fb87ba6b4aa506e4 /core
parent	bdfbc1dcaf121a1a1239857adcf54cdfe82c26dc (diff)
download	spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.tar.gz spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.tar.bz2 spark-c939c70ac1ab6a26d9fda0a99c4e837f7e5a7935.zip