From ea08537143d58b79b3ae5d083e9b3a5647257da8 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 11 Feb 2013 13:23:50 -0800
Subject: Fixed an exponential recursion that could happen with doCheckpoint
 due to lack of memoization

---
 core/src/main/scala/spark/RDD.scala | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'core/src')

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 6abb5c4792..f6e927a989 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -636,16 +636,22 @@ abstract class RDD[T: ClassManifest](
   /** The [[spark.SparkContext]] that this RDD was created on. */
   def context = sc
 
+  // Avoid handling doCheckpoint multiple times to prevent excessive recursion
+  private var doCheckpointCalled = false
+
   /**
    * Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler
    * after a job using this RDD has completed (therefore the RDD has been materialized and
    * potentially stored in memory). doCheckpoint() is called recursively on the parent RDDs.
    */
   private[spark] def doCheckpoint() {
-    if (checkpointData.isDefined) {
-      checkpointData.get.doCheckpoint()
-    } else {
-      dependencies.foreach(_.rdd.doCheckpoint())
+    if (!doCheckpointCalled) {
+      doCheckpointCalled = true
+      if (checkpointData.isDefined) {
+        checkpointData.get.doCheckpoint()
+      } else {
+        dependencies.foreach(_.rdd.doCheckpoint())
+      }
     }
   }
 
-- 
cgit v1.2.3