From 0337d88321f3681009de548ce10ba7e0ca8f1a58 Mon Sep 17 00:00:00 2001
From: Evan Chan <ev@ooyala.com>
Date: Sun, 21 Jul 2013 18:07:19 -0700
Subject: Add a public method getCachedRdds to SparkContext

---
 core/src/main/scala/spark/SparkContext.scala | 8 +++++++-
 core/src/test/scala/spark/RDDSuite.scala     | 6 ++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'core')

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index c01e315e35..1b46665d2c 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -546,6 +546,12 @@ class SparkContext(
     StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this)
   }
 
+  /**
+   * Returns an immutable map of RDDs that have marked themselves as cached via cache() call.
+   * Note that this does not necessarily mean the caching or computation was successful.
+   */
+  def getCachedRDDs: Map[Int, RDD[_]] = persistentRdds.asInstanceOf[Map[Int, RDD[_]]]
+
   def getStageInfo: Map[Stage,StageInfo] = {
     dagScheduler.stageToInfos
   }
@@ -580,7 +586,7 @@ class SparkContext(
         case null | "file" =>
           if (SparkHadoopUtil.isYarnMode()) {
             logWarning("local jar specified as parameter to addJar under Yarn mode")
-            return 
+            return
           }
           env.httpFileServer.addJar(new File(uri.getPath))
         case _ => path
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index cbddf4e523..ff2dcd72d8 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -90,15 +90,19 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   }
 
   test("basic caching") {
+    val origCachedRdds = sc.getCachedRDDs.size
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     assert(rdd.collect().toList === List(1, 2, 3, 4))
     assert(rdd.collect().toList === List(1, 2, 3, 4))
     assert(rdd.collect().toList === List(1, 2, 3, 4))
+    // Should only result in one cached RDD
+    assert(sc.getCachedRDDs.size === origCachedRdds + 1)
   }
 
   test("caching with failures") {
     val onlySplit = new Partition { override def index: Int = 0 }
     var shouldFail = true
+    val origCachedRdds = sc.getCachedRDDs.size
     val rdd = new RDD[Int](sc, Nil) {
       override def getPartitions: Array[Partition] = Array(onlySplit)
       override val getDependencies = List[Dependency[_]]()
@@ -110,12 +114,14 @@ class RDDSuite extends FunSuite with SharedSparkContext {
         }
       }
     }.cache()
+    assert(sc.getCachedRDDs.size === origCachedRdds + 1)
     val thrown = intercept[Exception]{
       rdd.collect()
     }
     assert(thrown.getMessage.contains("injected failure"))
     shouldFail = false
     assert(rdd.collect().toList === List(1, 2, 3, 4))
+    assert(sc.getCachedRDDs.size === origCachedRdds + 1)
   }
 
   test("empty RDD") {
-- 
cgit v1.2.3


From 2c2bfbe294c0082520c80a01562a2dbeeba63b7a Mon Sep 17 00:00:00 2001
From: Evan Chan <ev@ooyala.com>
Date: Tue, 23 Jul 2013 01:36:44 -0700
Subject: Add toMap method to TimeStampedHashMap and use it

---
 core/src/main/scala/spark/SparkContext.scala            | 2 +-
 core/src/main/scala/spark/util/TimeStampedHashMap.scala | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'core')

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 1b46665d2c..0fb7dfa810 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -550,7 +550,7 @@ class SparkContext(
    * Returns an immutable map of RDDs that have marked themselves as cached via cache() call.
    * Note that this does not necessarily mean the caching or computation was successful.
    */
-  def getCachedRDDs: Map[Int, RDD[_]] = persistentRdds.asInstanceOf[Map[Int, RDD[_]]]
+  def getCachedRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
 
   def getStageInfo: Map[Stage,StageInfo] = {
     dagScheduler.stageToInfos
diff --git a/core/src/main/scala/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/spark/util/TimeStampedHashMap.scala
index cc7909194a..07772a0afb 100644
--- a/core/src/main/scala/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala/spark/util/TimeStampedHashMap.scala
@@ -20,6 +20,7 @@ package spark.util
 import java.util.concurrent.ConcurrentHashMap
 import scala.collection.JavaConversions
 import scala.collection.mutable.Map
+import scala.collection.immutable
 import spark.scheduler.MapStatus
 
 /**
@@ -99,6 +100,8 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging {
     }
   }
 
+  def toMap: immutable.Map[A, B] = iterator.toMap
+
   /**
    * Removes old key-value pairs that have timestamp earlier than `threshTime`
    */
-- 
cgit v1.2.3


From 4830e225624091fa836012651420cf2b5b97dcca Mon Sep 17 00:00:00 2001
From: Evan Chan <ev@ooyala.com>
Date: Tue, 23 Jul 2013 09:50:13 -0700
Subject: Rename method per rxin feedback

---
 core/src/main/scala/spark/SparkContext.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'core')

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
index 0fb7dfa810..24ba605646 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/spark/SparkContext.scala
@@ -547,10 +547,10 @@ class SparkContext(
   }
 
   /**
-   * Returns an immutable map of RDDs that have marked themselves as cached via cache() call.
+   * Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
    * Note that this does not necessarily mean the caching or computation was successful.
    */
-  def getCachedRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
+  def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
 
   def getStageInfo: Map[Stage,StageInfo] = {
     dagScheduler.stageToInfos
-- 
cgit v1.2.3


From efd6418c1b99c1ecc2b0a4c72e6430eea4d86260 Mon Sep 17 00:00:00 2001
From: Evan Chan <ev@ooyala.com>
Date: Tue, 23 Jul 2013 10:40:41 -0700
Subject: Move getPersistentRDDs testing to a new Suite

---
 core/src/test/scala/spark/RDDSuite.scala           |  6 ---
 .../test/scala/spark/SparkContextInfoSuite.scala   | 60 ++++++++++++++++++++++
 2 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 core/src/test/scala/spark/SparkContextInfoSuite.scala

(limited to 'core')

diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index ff2dcd72d8..cbddf4e523 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -90,19 +90,15 @@ class RDDSuite extends FunSuite with SharedSparkContext {
   }
 
   test("basic caching") {
-    val origCachedRdds = sc.getCachedRDDs.size
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     assert(rdd.collect().toList === List(1, 2, 3, 4))
     assert(rdd.collect().toList === List(1, 2, 3, 4))
     assert(rdd.collect().toList === List(1, 2, 3, 4))
-    // Should only result in one cached RDD
-    assert(sc.getCachedRDDs.size === origCachedRdds + 1)
   }
 
   test("caching with failures") {
     val onlySplit = new Partition { override def index: Int = 0 }
     var shouldFail = true
-    val origCachedRdds = sc.getCachedRDDs.size
     val rdd = new RDD[Int](sc, Nil) {
       override def getPartitions: Array[Partition] = Array(onlySplit)
       override val getDependencies = List[Dependency[_]]()
@@ -114,14 +110,12 @@ class RDDSuite extends FunSuite with SharedSparkContext {
         }
       }
     }.cache()
-    assert(sc.getCachedRDDs.size === origCachedRdds + 1)
     val thrown = intercept[Exception]{
       rdd.collect()
     }
     assert(thrown.getMessage.contains("injected failure"))
     shouldFail = false
     assert(rdd.collect().toList === List(1, 2, 3, 4))
-    assert(sc.getCachedRDDs.size === origCachedRdds + 1)
   }
 
   test("empty RDD") {
diff --git a/core/src/test/scala/spark/SparkContextInfoSuite.scala b/core/src/test/scala/spark/SparkContextInfoSuite.scala
new file mode 100644
index 0000000000..6d50bf5e1b
--- /dev/null
+++ b/core/src/test/scala/spark/SparkContextInfoSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package spark
+
+import org.scalatest.FunSuite
+import spark.SparkContext._
+
+class SparkContextInfoSuite extends FunSuite with LocalSparkContext {
+  test("getPersistentRDDs only returns RDDs that are marked as cached") {
+    sc = new SparkContext("local", "test")
+    assert(sc.getPersistentRDDs.isEmpty === true)
+
+    val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
+    assert(sc.getPersistentRDDs.isEmpty === true)
+
+    rdd.cache()
+    assert(sc.getPersistentRDDs.size === 1)
+    assert(sc.getPersistentRDDs.values.head === rdd)
+  }
+
+  test("getPersistentRDDs returns an immutable map") {
+    sc = new SparkContext("local", "test")
+    val rdd1 = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
+
+    val myRdds = sc.getPersistentRDDs
+    assert(myRdds.size === 1)
+    assert(myRdds.values.head === rdd1)
+
+    val rdd2 = sc.makeRDD(Array(5, 6, 7, 8), 1).cache()
+
+    // getPersistentRDDs should have 2 RDDs, but myRdds should not change
+    assert(sc.getPersistentRDDs.size === 2)
+    assert(myRdds.size === 1)
+  }
+
+  test("getRDDStorageInfo only reports on RDDs that actually persist data") {
+    sc = new SparkContext("local", "test")
+    val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
+
+    assert(sc.getRDDStorageInfo.size === 0)
+
+    rdd.collect()
+    assert(sc.getRDDStorageInfo.size === 1)
+  }
+}
\ No newline at end of file
-- 
cgit v1.2.3