From 0af9ea74a07ecdc08c43fa63cb9c9f0c57e3029b Mon Sep 17 00:00:00 2001 From: Ilya Ganelin Date: Fri, 13 Mar 2015 13:21:04 +0000 Subject: [SPARK-5845][Shuffle] Time to cleanup spilled shuffle files not included in shuffle write time I've added a timer in the right place to fix this inaccuracy. Author: Ilya Ganelin Closes #4965 from ilganeli/SPARK-5845 and squashes the following commits: bfabf88 [Ilya Ganelin] Changed to using a foreach vs. getorelse 3e059b0 [Ilya Ganelin] Switched to using getorelse b946d08 [Ilya Ganelin] Fixed error with option 9434b50 [Ilya Ganelin] Merge remote-tracking branch 'upstream/master' into SPARK-5845 db8647e [Ilya Ganelin] Added update for shuffleWriteTime around spilled file cleanup in ExternalSorter --- .../main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala index 27496c5a28..fa2e617762 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala @@ -88,7 +88,10 @@ private[spark] class SortShuffleWriter[K, V, C]( } finally { // Clean up our sorter, which may have its own intermediate files if (sorter != null) { + val startTime = System.nanoTime() sorter.stop() + context.taskMetrics.shuffleWriteMetrics.foreach( + _.incShuffleWriteTime(System.nanoTime - startTime)) sorter = null } } -- cgit v1.2.3