aboutsummaryrefslogtreecommitdiff
path: root/core/src/test/scala
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2014-04-27 17:40:56 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-04-27 17:40:56 -0700
commit6b3c6e5dd8e74435f71ecdb224db532550ef407b (patch)
tree9ff06e8ce1d8dd01b2ccb5e4820adbdd02310298 /core/src/test/scala
parent3d9fb09681308abd2066d0d02f2438f5a17c9dd9 (diff)
downloadspark-6b3c6e5dd8e74435f71ecdb224db532550ef407b.tar.gz
spark-6b3c6e5dd8e74435f71ecdb224db532550ef407b.tar.bz2
spark-6b3c6e5dd8e74435f71ecdb224db532550ef407b.zip
SPARK-1145: Memory mapping with many small blocks can cause JVM allocation failures
This includes some minor code clean-up as well. The main change is that small files are not memory mapped. There is a nicer way to write that code block using Scala's `Try` but to make it easy to back port and as simple as possible, I opted for the more explicit but less pretty format. Author: Patrick Wendell <pwendell@gmail.com> Closes #43 from pwendell/block-iter-logging and squashes the following commits: 1cff512 [Patrick Wendell] Small issue from merge. 49f6c269 [Patrick Wendell] Merge remote-tracking branch 'apache/master' into block-iter-logging 4943351 [Patrick Wendell] Added a test and feedback on mateis review a637a18 [Patrick Wendell] Review feedback and adding rewind() when reading byte buffers. b76b95f [Patrick Wendell] Review feedback 4e1514e [Patrick Wendell] Don't memory map for small files d238b88 [Patrick Wendell] Some logging and clean-up
Diffstat (limited to 'core/src/test/scala')
-rw-r--r--core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala58
1 files changed, 54 insertions, 4 deletions
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 907428db80..00deecc1c3 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -17,12 +17,15 @@
package org.apache.spark.storage
-import java.nio.ByteBuffer
+import java.nio.{ByteBuffer, MappedByteBuffer}
+import java.util.Arrays
import akka.actor._
-import org.scalatest.BeforeAndAfter
-import org.scalatest.FunSuite
-import org.scalatest.PrivateMethodTester
+import org.apache.spark.SparkConf
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
+import org.apache.spark.util.{AkkaUtils, ByteBufferInputStream, SizeEstimator, Utils}
+import org.mockito.Mockito.{mock, when}
+import org.scalatest.{BeforeAndAfter, FunSuite, PrivateMethodTester}
import org.scalatest.concurrent.Eventually._
import org.scalatest.concurrent.Timeouts._
import org.scalatest.matchers.ShouldMatchers._
@@ -785,6 +788,53 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
}
}
+ test("reads of memory-mapped and non memory-mapped files are equivalent") {
+ val confKey = "spark.storage.memoryMapThreshold"
+
+ // Create a non-trivial (not all zeros) byte array
+ var counter = 0.toByte
+ def incr = {counter = (counter + 1).toByte; counter;}
+ val bytes = Array.fill[Byte](1000)(incr)
+ val byteBuffer = ByteBuffer.wrap(bytes)
+
+ val blockId = BlockId("rdd_1_2")
+
+ // This sequence of mocks makes these tests fairly brittle. It would
+ // be nice to refactor classes involved in disk storage in a way that
+ // allows for easier testing.
+ val blockManager = mock(classOf[BlockManager])
+ val shuffleBlockManager = mock(classOf[ShuffleBlockManager])
+ when(shuffleBlockManager.conf).thenReturn(conf)
+ val diskBlockManager = new DiskBlockManager(shuffleBlockManager,
+ System.getProperty("java.io.tmpdir"))
+
+ when(blockManager.conf).thenReturn(conf.clone.set(confKey, 0.toString))
+ val diskStoreMapped = new DiskStore(blockManager, diskBlockManager)
+ diskStoreMapped.putBytes(blockId, byteBuffer, StorageLevel.DISK_ONLY)
+ val mapped = diskStoreMapped.getBytes(blockId).get
+
+ when(blockManager.conf).thenReturn(conf.clone.set(confKey, (1000 * 1000).toString))
+ val diskStoreNotMapped = new DiskStore(blockManager, diskBlockManager)
+ diskStoreNotMapped.putBytes(blockId, byteBuffer, StorageLevel.DISK_ONLY)
+ val notMapped = diskStoreNotMapped.getBytes(blockId).get
+
+ // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
+ assert(notMapped.getClass.getName.endsWith("HeapByteBuffer"),
+ "Expected HeapByteBuffer for un-mapped read")
+ assert(mapped.isInstanceOf[MappedByteBuffer], "Expected MappedByteBuffer for mapped read")
+
+ def arrayFromByteBuffer(in: ByteBuffer): Array[Byte] = {
+ val array = new Array[Byte](in.remaining())
+ in.get(array)
+ array
+ }
+
+ val mappedAsArray = arrayFromByteBuffer(mapped)
+ val notMappedAsArray = arrayFromByteBuffer(notMapped)
+ assert(Arrays.equals(mappedAsArray, bytes))
+ assert(Arrays.equals(notMappedAsArray, bytes))
+ }
+
test("updated block statuses") {
store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf,
securityMgr, mapOutputTracker)