aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org/apache
diff options
context:
space:
mode:
authorEric Liang <ekl@databricks.com>2016-04-19 15:55:21 -0700
committerReynold Xin <rxin@databricks.com>2016-04-19 15:55:21 -0700
commit008a8bbef0d3475610c13fff778a425900912650 (patch)
tree8d6bb2dd8dc29328857d26816ca491f51fc735b4 /core/src/main/scala/org/apache
parentda8859226e09aa6ebcf6a1c5c1369dec3c216eac (diff)
downloadspark-008a8bbef0d3475610c13fff778a425900912650.tar.gz
spark-008a8bbef0d3475610c13fff778a425900912650.tar.bz2
spark-008a8bbef0d3475610c13fff778a425900912650.zip
[SPARK-14733] Allow custom timing control in microbenchmarks
## What changes were proposed in this pull request? The current benchmark framework runs a code block for several iterations and reports statistics. However there is no way to exclude per-iteration setup time from the overall results. This PR adds a timer control object passed into the closure that can be used for this purpose. ## How was this patch tested? Existing benchmark code. Also see https://github.com/apache/spark/pull/12490 Author: Eric Liang <ekl@databricks.com> Closes #12502 from ericl/spark-14733.
Diffstat (limited to 'core/src/main/scala/org/apache')
-rw-r--r--core/src/main/scala/org/apache/spark/util/Benchmark.scala57
1 files changed, 49 insertions, 8 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
index 3718542810..1fc0ad7a4d 100644
--- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala
+++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
@@ -42,7 +42,24 @@ private[spark] class Benchmark(
outputPerIteration: Boolean = false) {
val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case]
+ /**
+ * Adds a case to run when run() is called. The given function will be run for several
+ * iterations to collect timing statistics.
+ */
def addCase(name: String)(f: Int => Unit): Unit = {
+ addTimerCase(name) { timer =>
+ timer.startTiming()
+ f(timer.iteration)
+ timer.stopTiming()
+ }
+ }
+
+ /**
+ * Adds a case with manual timing control. When the function is run, timing does not start
+ * until timer.startTiming() is called within the given function. The corresponding
+ * timer.stopTiming() method must be called before the function returns.
+ */
+ def addTimerCase(name: String)(f: Benchmark.Timer => Unit): Unit = {
benchmarks += Benchmark.Case(name, f)
}
@@ -84,7 +101,34 @@ private[spark] class Benchmark(
}
private[spark] object Benchmark {
- case class Case(name: String, fn: Int => Unit)
+
+ /**
+ * Object available to benchmark code to control timing e.g. to exclude set-up time.
+ *
+ * @param iteration specifies this is the nth iteration of running the benchmark case
+ */
+ class Timer(val iteration: Int) {
+ private var accumulatedTime: Long = 0L
+ private var timeStart: Long = 0L
+
+ def startTiming(): Unit = {
+ assert(timeStart == 0L, "Already started timing.")
+ timeStart = System.nanoTime
+ }
+
+ def stopTiming(): Unit = {
+ assert(timeStart != 0L, "Have not started timing.")
+ accumulatedTime += System.nanoTime - timeStart
+ timeStart = 0L
+ }
+
+ def totalTime(): Long = {
+ assert(timeStart == 0L, "Have not stopped timing.")
+ accumulatedTime
+ }
+ }
+
+ case class Case(name: String, fn: Timer => Unit)
case class Result(avgMs: Double, bestRate: Double, bestMs: Double)
/**
@@ -123,15 +167,12 @@ private[spark] object Benchmark {
* Runs a single function `f` for iters, returning the average time the function took and
* the rate of the function.
*/
- def measure(num: Long, iters: Int, outputPerIteration: Boolean)(f: Int => Unit): Result = {
+ def measure(num: Long, iters: Int, outputPerIteration: Boolean)(f: Timer => Unit): Result = {
val runTimes = ArrayBuffer[Long]()
for (i <- 0 until iters + 1) {
- val start = System.nanoTime()
-
- f(i)
-
- val end = System.nanoTime()
- val runTime = end - start
+ val timer = new Benchmark.Timer(i)
+ f(timer)
+ val runTime = timer.totalTime()
if (i > 0) {
runTimes += runTime
}