diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2012-09-30 21:19:39 -0700 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2012-09-30 21:19:39 -0700 |
commit | 83143f9a5f92ca5c341332c809f0adf7e58885b6 (patch) | |
tree | e6a376187bc640c9917f9c037ce74a2df2942a31 /core/src/test | |
parent | fd0374b9de2e32d55fb14c371a98f0f39c30a17a (diff) | |
download | spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.tar.gz spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.tar.bz2 spark-83143f9a5f92ca5c341332c809f0adf7e58885b6.zip |
Fixed several bugs that caused weird behavior with files in spark-shell:
- SizeEstimator was following through a ClassLoader field of Hadoop
JobConfs, which referenced the whole interpreter, Scala compiler, etc.
Chaos ensued, giving an estimated size in the tens of gigabytes.
- Broadcast variables in local mode were only stored as MEMORY_ONLY and
never made accessible over a server, so they fell out of the cache when
they were deemed too large and couldn't be reloaded.
Diffstat (limited to 'core/src/test')
-rw-r--r-- | core/src/test/scala/spark/FileSuite.scala | 16 | ||||
-rw-r--r-- | core/src/test/scala/spark/RDDSuite.scala | 8 |
2 files changed, 23 insertions, 1 deletions
diff --git a/core/src/test/scala/spark/FileSuite.scala b/core/src/test/scala/spark/FileSuite.scala index 17c7a8de43..5c1577ed0b 100644 --- a/core/src/test/scala/spark/FileSuite.scala +++ b/core/src/test/scala/spark/FileSuite.scala @@ -1,6 +1,6 @@ package spark -import java.io.File +import java.io.{FileWriter, PrintWriter, File} import scala.io.Source @@ -142,4 +142,18 @@ class FileSuite extends FunSuite with BeforeAndAfter { sc.newAPIHadoopFile[IntWritable, Text, SequenceFileInputFormat[IntWritable, Text]](outputDir) assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", "(3,aaa)")) } + + test("file caching") { + sc = new SparkContext("local", "test") + val tempDir = Files.createTempDir() + val out = new FileWriter(tempDir + "/input") + out.write("Hello world!\n") + out.write("What's up?\n") + out.write("Goodbye\n") + out.close() + val rdd = sc.textFile(tempDir + "/input").cache() + assert(rdd.count() === 3) + assert(rdd.count() === 3) + assert(rdd.count() === 3) + } } diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index a1fe63beaf..ade457c0f9 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -72,6 +72,14 @@ class RDDSuite extends FunSuite with BeforeAndAfter { assert(rdd.collect().toList === List(1, 1, 2, 1, 2, 3, 1, 2, 3, 4)) } + test("basic caching") { + sc = new SparkContext("local", "test") + val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache() + assert(rdd.collect().toList === List(1, 2, 3, 4)) + assert(rdd.collect().toList === List(1, 2, 3, 4)) + assert(rdd.collect().toList === List(1, 2, 3, 4)) + } + test("coalesced RDDs") { sc = new SparkContext("local", "test") val data = sc.parallelize(1 to 10, 10) |