aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Ganelin <ilya.ganelin@capitalone.com>2015-03-13 13:21:04 +0000
committerSean Owen <sowen@cloudera.com>2015-03-13 13:21:04 +0000
commit0af9ea74a07ecdc08c43fa63cb9c9f0c57e3029b (patch)
treebdee712a5d184d255dd33b3723a628ca74c43b13
parent3980ebdf188d77799b55b407b115cdc82f51d532 (diff)
downloadspark-0af9ea74a07ecdc08c43fa63cb9c9f0c57e3029b.tar.gz
spark-0af9ea74a07ecdc08c43fa63cb9c9f0c57e3029b.tar.bz2
spark-0af9ea74a07ecdc08c43fa63cb9c9f0c57e3029b.zip
[SPARK-5845][Shuffle] Time to cleanup spilled shuffle files not included in shuffle write time
I've added a timer in the right place to fix this inaccuracy. Author: Ilya Ganelin <ilya.ganelin@capitalone.com> Closes #4965 from ilganeli/SPARK-5845 and squashes the following commits: bfabf88 [Ilya Ganelin] Changed to using a foreach vs. getorelse 3e059b0 [Ilya Ganelin] Switched to using getorelse b946d08 [Ilya Ganelin] Fixed error with option 9434b50 [Ilya Ganelin] Merge remote-tracking branch 'upstream/master' into SPARK-5845 db8647e [Ilya Ganelin] Added update for shuffleWriteTime around spilled file cleanup in ExternalSorter
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala3
1 files changed, 3 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 27496c5a28..fa2e617762 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -88,7 +88,10 @@ private[spark] class SortShuffleWriter[K, V, C](
} finally {
// Clean up our sorter, which may have its own intermediate files
if (sorter != null) {
+ val startTime = System.nanoTime()
sorter.stop()
+ context.taskMetrics.shuffleWriteMetrics.foreach(
+ _.incShuffleWriteTime(System.nanoTime - startTime))
sorter = null
}
}