From ea08537143d58b79b3ae5d083e9b3a5647257da8 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Mon, 11 Feb 2013 13:23:50 -0800 Subject: Fixed an exponential recursion that could happen with doCheckpoint due to lack of memoization --- core/src/main/scala/spark/RDD.scala | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'core/src') diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index 6abb5c4792..f6e927a989 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -636,16 +636,22 @@ abstract class RDD[T: ClassManifest]( /** The [[spark.SparkContext]] that this RDD was created on. */ def context = sc + // Avoid handling doCheckpoint multiple times to prevent excessive recursion + private var doCheckpointCalled = false + /** * Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler * after a job using this RDD has completed (therefore the RDD has been materialized and * potentially stored in memory). doCheckpoint() is called recursively on the parent RDDs. */ private[spark] def doCheckpoint() { - if (checkpointData.isDefined) { - checkpointData.get.doCheckpoint() - } else { - dependencies.foreach(_.rdd.doCheckpoint()) + if (!doCheckpointCalled) { + doCheckpointCalled = true + if (checkpointData.isDefined) { + checkpointData.get.doCheckpoint() + } else { + dependencies.foreach(_.rdd.doCheckpoint()) + } } } -- cgit v1.2.3