Fixed several bugs that caused weird behavior with files in spark-shell:

- SizeEstimator was following through a ClassLoader field of Hadoop JobConfs, which referenced the whole interpreter, Scala compiler, etc. Chaos ensued, giving an estimated size in the tens of gigabytes. - Broadcast variables in local mode were only stored as MEMORY_ONLY and never made accessible over a server, so they fell out of the cache when they were deemed too large and couldn't be reloaded.
author: Matei Zaharia <matei@eecs.berkeley.edu> 2012-09-30 21:19:39 -0700
committer: Matei Zaharia <matei@eecs.berkeley.edu> 2012-09-30 21:19:39 -0700
commit: 83143f9a5f92ca5c341332c809f0adf7e58885b6 (patch)
tree: e6a376187bc640c9917f9c037ce74a2df2942a31 /core/src/test
parent: fd0374b9de2e32d55fb14c371a98f0f39c30a17a (diff)
download: spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.tar.gz
spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.tar.bz2
spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.zip
2 files changed, 23 insertions, 1 deletions
diff --git a/core/src/test/scala/spark/FileSuite.scala b/core/src/test/scala/spark/FileSuite.scala
index 17c7a8de43..5c1577ed0b 100644
--- a/core/src/test/scala/spark/FileSuite.scala
+++ b/core/src/test/scala/spark/FileSuite.scala
@@ -1,6 +1,6 @@
 package spark
 
-import java.io.File
+import java.io.{FileWriter, PrintWriter, File}
 
 import scala.io.Source
 
@@ -142,4 +142,18 @@ class FileSuite extends FunSuite with BeforeAndAfter {
         sc.newAPIHadoopFile[IntWritable, Text, SequenceFileInputFormat[IntWritable, Text]](outputDir)
     assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", "(3,aaa)"))
   }
+
+  test("file caching") {
+    sc = new SparkContext("local", "test")
+    val tempDir = Files.createTempDir()
+    val out = new FileWriter(tempDir + "/input")
+    out.write("Hello world!\n")
+    out.write("What's up?\n")
+    out.write("Goodbye\n")
+    out.close()
+    val rdd = sc.textFile(tempDir + "/input").cache()
+    assert(rdd.count() === 3)
+    assert(rdd.count() === 3)
+    assert(rdd.count() === 3)
+  }
 }
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala
index a1fe63beaf..ade457c0f9 100644
--- a/core/src/test/scala/spark/RDDSuite.scala
+++ b/core/src/test/scala/spark/RDDSuite.scala
@@ -72,6 +72,14 @@ class RDDSuite extends FunSuite with BeforeAndAfter {
     assert(rdd.collect().toList === List(1, 1, 2, 1, 2, 3, 1, 2, 3, 4))
   }
 
+  test("basic caching") {
+    sc = new SparkContext("local", "test")
+    val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
+    assert(rdd.collect().toList === List(1, 2, 3, 4))
+    assert(rdd.collect().toList === List(1, 2, 3, 4))
+    assert(rdd.collect().toList === List(1, 2, 3, 4))
+  }
+
   test("coalesced RDDs") {
     sc = new SparkContext("local", "test")
     val data = sc.parallelize(1 to 10, 10)
author	Matei Zaharia <matei@eecs.berkeley.edu>	2012-09-30 21:19:39 -0700
committer	Matei Zaharia <matei@eecs.berkeley.edu>	2012-09-30 21:19:39 -0700
commit	83143f9a5f92ca5c341332c809f0adf7e58885b6 (patch)
tree	e6a376187bc640c9917f9c037ce74a2df2942a31 /core/src/test
parent	fd0374b9de2e32d55fb14c371a98f0f39c30a17a (diff)
download	spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.tar.gz spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.tar.bz2 spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.zip