aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorAndrew Or <andrewor14@gmail.com>2014-07-23 10:31:45 -0700
committerMatei Zaharia <matei@databricks.com>2014-07-23 10:31:45 -0700
commit25921110fcd5afe568bf0d25fccd232787af7911 (patch)
tree3bbdd905d4ccae9639d155d9630549d7493b2e0b /core
parent4c7243e109c713bdfb87891748800109ffbaae07 (diff)
downloadspark-25921110fcd5afe568bf0d25fccd232787af7911.tar.gz
spark-25921110fcd5afe568bf0d25fccd232787af7911.tar.bz2
spark-25921110fcd5afe568bf0d25fccd232787af7911.zip
[SPARK-2609] Log thread ID when spilling ExternalAppendOnlyMap
It's useful to know whether one thread is constantly spilling or multiple threads are spilling relatively infrequently. Right now everything looks a little jumbled and we can't tell which lines belong to the same thread. For instance: ``` 06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (194 times so far) 06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (198 times so far) 06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (198 times so far) 06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 10 MB to disk (197 times so far) 06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 9 MB to disk (45 times so far) 06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 23 MB to disk (198 times so far) 06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 38 MB to disk (25 times so far) 06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 161 MB to disk (25 times so far) 06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 0 MB to disk (199 times so far) 06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (166 times so far) 06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (199 times so far) 06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (200 times so far) ``` Author: Andrew Or <andrewor14@gmail.com> Closes #1517 from andrewor14/external-log and squashes the following commits: 90e48bb [Andrew Or] Log thread ID when spilling
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala6
1 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 71ab2a3e3b..be8f6529f7 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -106,6 +106,7 @@ class ExternalAppendOnlyMap[K, V, C](
private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
private val keyComparator = new HashComparator[K]
private val ser = serializer.newInstance()
+ private val threadId = Thread.currentThread().getId
/**
* Insert the given key and value into the map.
@@ -128,7 +129,6 @@ class ExternalAppendOnlyMap[K, V, C](
// Atomically check whether there is sufficient memory in the global pool for
// this map to grow and, if possible, allocate the required amount
shuffleMemoryMap.synchronized {
- val threadId = Thread.currentThread().getId
val previouslyOccupiedMemory = shuffleMemoryMap.get(threadId)
val availableMemory = maxMemoryThreshold -
(shuffleMemoryMap.values.sum - previouslyOccupiedMemory.getOrElse(0L))
@@ -153,8 +153,8 @@ class ExternalAppendOnlyMap[K, V, C](
*/
private def spill(mapSize: Long) {
spillCount += 1
- logWarning("Spilling in-memory map of %d MB to disk (%d time%s so far)"
- .format(mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
+ logWarning("Thread %d spilling in-memory map of %d MB to disk (%d time%s so far)"
+ .format(threadId, mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
val (blockId, file) = diskBlockManager.createTempBlock()
var writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize)
var objectsWritten = 0