aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorGuillaume Poulin <poulin.guillaume@gmail.com>2016-01-06 21:34:46 -0800
committerReynold Xin <rxin@databricks.com>2016-01-06 21:34:46 -0800
commitb6738520374637347ab5ae6c801730cdb6b35daa (patch)
tree53b917c78d5b15aee5221d52b67b04e7b132148f /core
parent174e72ceca41a6ac17ad05d50832ee9c561918c0 (diff)
downloadspark-b6738520374637347ab5ae6c801730cdb6b35daa.tar.gz
spark-b6738520374637347ab5ae6c801730cdb6b35daa.tar.bz2
spark-b6738520374637347ab5ae6c801730cdb6b35daa.zip
[SPARK-12678][CORE] MapPartitionsRDD clearDependencies
MapPartitionsRDD was keeping a reference to `prev` after a call to `clearDependencies` which could lead to memory leak. Author: Guillaume Poulin <poulin.guillaume@gmail.com> Closes #10623 from gpoulin/map_partition_deps.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala7
1 files changed, 6 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index 4312d3a417..e4587c96ea 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -25,7 +25,7 @@ import org.apache.spark.{Partition, TaskContext}
* An RDD that applies the provided function to every partition of the parent RDD.
*/
private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
- prev: RDD[T],
+ var prev: RDD[T],
f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext, partition index, iterator)
preservesPartitioning: Boolean = false)
extends RDD[U](prev) {
@@ -36,4 +36,9 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
override def compute(split: Partition, context: TaskContext): Iterator[U] =
f(context, split.index, firstParent[T].iterator(split, context))
+
+ override def clearDependencies() {
+ super.clearDependencies()
+ prev = null
+ }
}