From 008a8bbef0d3475610c13fff778a425900912650 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Tue, 19 Apr 2016 15:55:21 -0700 Subject: [SPARK-14733] Allow custom timing control in microbenchmarks ## What changes were proposed in this pull request? The current benchmark framework runs a code block for several iterations and reports statistics. However there is no way to exclude per-iteration setup time from the overall results. This PR adds a timer control object passed into the closure that can be used for this purpose. ## How was this patch tested? Existing benchmark code. Also see https://github.com/apache/spark/pull/12490 Author: Eric Liang Closes #12502 from ericl/spark-14733. --- .../scala/org/apache/spark/util/Benchmark.scala | 57 +++++++++++++++++++--- 1 file changed, 49 insertions(+), 8 deletions(-) (limited to 'core/src/main/scala/org/apache') diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala index 3718542810..1fc0ad7a4d 100644 --- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala @@ -42,7 +42,24 @@ private[spark] class Benchmark( outputPerIteration: Boolean = false) { val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case] + /** + * Adds a case to run when run() is called. The given function will be run for several + * iterations to collect timing statistics. + */ def addCase(name: String)(f: Int => Unit): Unit = { + addTimerCase(name) { timer => + timer.startTiming() + f(timer.iteration) + timer.stopTiming() + } + } + + /** + * Adds a case with manual timing control. When the function is run, timing does not start + * until timer.startTiming() is called within the given function. The corresponding + * timer.stopTiming() method must be called before the function returns. + */ + def addTimerCase(name: String)(f: Benchmark.Timer => Unit): Unit = { benchmarks += Benchmark.Case(name, f) } @@ -84,7 +101,34 @@ private[spark] class Benchmark( } private[spark] object Benchmark { - case class Case(name: String, fn: Int => Unit) + + /** + * Object available to benchmark code to control timing e.g. to exclude set-up time. + * + * @param iteration specifies this is the nth iteration of running the benchmark case + */ + class Timer(val iteration: Int) { + private var accumulatedTime: Long = 0L + private var timeStart: Long = 0L + + def startTiming(): Unit = { + assert(timeStart == 0L, "Already started timing.") + timeStart = System.nanoTime + } + + def stopTiming(): Unit = { + assert(timeStart != 0L, "Have not started timing.") + accumulatedTime += System.nanoTime - timeStart + timeStart = 0L + } + + def totalTime(): Long = { + assert(timeStart == 0L, "Have not stopped timing.") + accumulatedTime + } + } + + case class Case(name: String, fn: Timer => Unit) case class Result(avgMs: Double, bestRate: Double, bestMs: Double) /** @@ -123,15 +167,12 @@ private[spark] object Benchmark { * Runs a single function `f` for iters, returning the average time the function took and * the rate of the function. */ - def measure(num: Long, iters: Int, outputPerIteration: Boolean)(f: Int => Unit): Result = { + def measure(num: Long, iters: Int, outputPerIteration: Boolean)(f: Timer => Unit): Result = { val runTimes = ArrayBuffer[Long]() for (i <- 0 until iters + 1) { - val start = System.nanoTime() - - f(i) - - val end = System.nanoTime() - val runTime = end - start + val timer = new Benchmark.Timer(i) + f(timer) + val runTime = timer.totalTime() if (i > 0) { runTimes += runTime } -- cgit v1.2.3