From 695f0e9195209c75bfc62fc70bfc6d7d9f1047b3 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Tue, 3 May 2016 22:56:40 -0700 Subject: [SPARK-15107][SQL] Allow varying # iterations by test case in Benchmark ## What changes were proposed in this pull request? This patch changes our micro-benchmark util to allow setting different iteration numbers for different test cases. For some of our benchmarks, turning off whole-stage codegen can make the runtime 20X slower, making it very difficult to run a large number of times without substantially shortening the input cardinality. With this change, I set the default num iterations to 2 for whole stage codegen off, and 5 for whole stage codegen on. I also updated some results. ## How was this patch tested? N/A - this is a test util. Author: Reynold Xin Closes #12884 from rxin/SPARK-15107. --- .../scala/org/apache/spark/util/Benchmark.scala | 21 ++++++++++----------- .../collection/unsafe/sort/RadixSortSuite.scala | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'core/src') diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala index 1fc0ad7a4d..0c685b1918 100644 --- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala @@ -38,7 +38,7 @@ import org.apache.commons.lang3.SystemUtils private[spark] class Benchmark( name: String, valuesPerIteration: Long, - iters: Int = 5, + defaultNumIters: Int = 5, outputPerIteration: Boolean = false) { val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case] @@ -46,8 +46,8 @@ private[spark] class Benchmark( * Adds a case to run when run() is called. The given function will be run for several * iterations to collect timing statistics. */ - def addCase(name: String)(f: Int => Unit): Unit = { - addTimerCase(name) { timer => + def addCase(name: String, numIters: Int = 0)(f: Int => Unit): Unit = { + addTimerCase(name, numIters) { timer => timer.startTiming() f(timer.iteration) timer.stopTiming() @@ -59,8 +59,8 @@ private[spark] class Benchmark( * until timer.startTiming() is called within the given function. The corresponding * timer.stopTiming() method must be called before the function returns. */ - def addTimerCase(name: String)(f: Benchmark.Timer => Unit): Unit = { - benchmarks += Benchmark.Case(name, f) + def addTimerCase(name: String, numIters: Int = 0)(f: Benchmark.Timer => Unit): Unit = { + benchmarks += Benchmark.Case(name, f, if (numIters == 0) defaultNumIters else numIters) } /** @@ -75,7 +75,7 @@ private[spark] class Benchmark( val results = benchmarks.map { c => println(" Running case: " + c.name) - Benchmark.measure(valuesPerIteration, iters, outputPerIteration)(c.fn) + Benchmark.measure(valuesPerIteration, c.numIters, outputPerIteration)(c.fn) } println @@ -83,12 +83,11 @@ private[spark] class Benchmark( // The results are going to be processor specific so it is useful to include that. println(Benchmark.getJVMOSInfo()) println(Benchmark.getProcessorName()) - printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", + printf("%-40s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", "Per Row(ns)", "Relative") - println("-----------------------------------------------------------------------------------" + - "--------") + println("-" * 96) results.zip(benchmarks).foreach { case (result, benchmark) => - printf("%-35s %16s %12s %13s %10s\n", + printf("%-40s %16s %12s %13s %10s\n", benchmark.name, "%5.0f / %4.0f" format (result.bestMs, result.avgMs), "%10.1f" format result.bestRate, @@ -128,7 +127,7 @@ private[spark] object Benchmark { } } - case class Case(name: String, fn: Timer => Unit) + case class Case(name: String, fn: Timer => Unit, numIters: Int) case class Result(avgMs: Double, bestRate: Double, bestMs: Double) /** diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala index 52428634e5..b03df1a94d 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala @@ -244,7 +244,7 @@ class RadixSortSuite extends SparkFunSuite with Logging { RadixSort.sortKeyPrefixArray(buf2, size, 0, 7, false, false) timer.stopTiming() } - benchmark.run + benchmark.run() /** Running benchmark: radix sort 25000000 -- cgit v1.2.3