aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2012-10-12 14:58:26 -0700
committerMatei Zaharia <matei@eecs.berkeley.edu>2012-10-12 14:58:26 -0700
commit682b2d9329ebfd82f5231acd940f9dbd2037d2ae (patch)
tree0c99a10f6c8e9d6dbd184d2ec46e9e89d1016957
parentdca496bb77e9391c40cfdbc61ae2512d19e7b766 (diff)
downloadspark-682b2d9329ebfd82f5231acd940f9dbd2037d2ae.tar.gz
spark-682b2d9329ebfd82f5231acd940f9dbd2037d2ae.tar.bz2
spark-682b2d9329ebfd82f5231acd940f9dbd2037d2ae.zip
Added a test for when an RDD only partially fits in memory
-rw-r--r--core/src/test/scala/spark/DistributedSuite.scala20
1 files changed, 18 insertions, 2 deletions
diff --git a/core/src/test/scala/spark/DistributedSuite.scala b/core/src/test/scala/spark/DistributedSuite.scala
index 76b0884481..2ea38d4a5d 100644
--- a/core/src/test/scala/spark/DistributedSuite.scala
+++ b/core/src/test/scala/spark/DistributedSuite.scala
@@ -158,12 +158,28 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
assert(data.count() === 1000)
}
- test("compute without caching with low memory") {
+ test("compute without caching when no partitions fit in memory") {
System.setProperty("spark.storage.memoryFraction", "0.0001")
sc = new SparkContext(clusterUrl, "test")
- val data = sc.parallelize(1 to 4000000, 2).persist(StorageLevel.MEMORY_ONLY)
+ // data will be 4 million * 4 bytes = 16 MB in size, but our memoryFraction set the cache
+ // to only 50 KB (0.0001 of 512 MB), so no partitions should fit in memory
+ val data = sc.parallelize(1 to 4000000, 2).persist(StorageLevel.MEMORY_ONLY_SER)
assert(data.count() === 4000000)
assert(data.count() === 4000000)
assert(data.count() === 4000000)
+ System.clearProperty("spark.storage.memoryFraction")
+ }
+
+ test("compute when only some partitions fit in memory") {
+ System.setProperty("spark.storage.memoryFraction", "0.01")
+ sc = new SparkContext(clusterUrl, "test")
+ // data will be 4 million * 4 bytes = 16 MB in size, but our memoryFraction set the cache
+ // to only 5 MB (0.01 of 512 MB), so not all of it will fit in memory; we use 20 partitions
+ // to make sure that *some* of them do fit though
+ val data = sc.parallelize(1 to 4000000, 20).persist(StorageLevel.MEMORY_ONLY_SER)
+ assert(data.count() === 4000000)
+ assert(data.count() === 4000000)
+ assert(data.count() === 4000000)
+ System.clearProperty("spark.storage.memoryFraction")
}
}