aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-05-03 22:56:40 -0700
committerReynold Xin <rxin@databricks.com>2016-05-03 22:56:40 -0700
commit695f0e9195209c75bfc62fc70bfc6d7d9f1047b3 (patch)
tree09a996aae9496a53b98ef5f0c6b9d0fabf3769dc /core
parent348c1389842c4d9a8807a41cf22caaa82f81d7ab (diff)
downloadspark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.tar.gz
spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.tar.bz2
spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.zip
[SPARK-15107][SQL] Allow varying # iterations by test case in Benchmark
## What changes were proposed in this pull request? This patch changes our micro-benchmark util to allow setting different iteration numbers for different test cases. For some of our benchmarks, turning off whole-stage codegen can make the runtime 20X slower, making it very difficult to run a large number of times without substantially shortening the input cardinality. With this change, I set the default num iterations to 2 for whole stage codegen off, and 5 for whole stage codegen on. I also updated some results. ## How was this patch tested? N/A - this is a test util. Author: Reynold Xin <rxin@databricks.com> Closes #12884 from rxin/SPARK-15107.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/Benchmark.scala21
-rw-r--r--core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala2
2 files changed, 11 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
index 1fc0ad7a4d..0c685b1918 100644
--- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala
+++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
@@ -38,7 +38,7 @@ import org.apache.commons.lang3.SystemUtils
private[spark] class Benchmark(
name: String,
valuesPerIteration: Long,
- iters: Int = 5,
+ defaultNumIters: Int = 5,
outputPerIteration: Boolean = false) {
val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case]
@@ -46,8 +46,8 @@ private[spark] class Benchmark(
* Adds a case to run when run() is called. The given function will be run for several
* iterations to collect timing statistics.
*/
- def addCase(name: String)(f: Int => Unit): Unit = {
- addTimerCase(name) { timer =>
+ def addCase(name: String, numIters: Int = 0)(f: Int => Unit): Unit = {
+ addTimerCase(name, numIters) { timer =>
timer.startTiming()
f(timer.iteration)
timer.stopTiming()
@@ -59,8 +59,8 @@ private[spark] class Benchmark(
* until timer.startTiming() is called within the given function. The corresponding
* timer.stopTiming() method must be called before the function returns.
*/
- def addTimerCase(name: String)(f: Benchmark.Timer => Unit): Unit = {
- benchmarks += Benchmark.Case(name, f)
+ def addTimerCase(name: String, numIters: Int = 0)(f: Benchmark.Timer => Unit): Unit = {
+ benchmarks += Benchmark.Case(name, f, if (numIters == 0) defaultNumIters else numIters)
}
/**
@@ -75,7 +75,7 @@ private[spark] class Benchmark(
val results = benchmarks.map { c =>
println(" Running case: " + c.name)
- Benchmark.measure(valuesPerIteration, iters, outputPerIteration)(c.fn)
+ Benchmark.measure(valuesPerIteration, c.numIters, outputPerIteration)(c.fn)
}
println
@@ -83,12 +83,11 @@ private[spark] class Benchmark(
// The results are going to be processor specific so it is useful to include that.
println(Benchmark.getJVMOSInfo())
println(Benchmark.getProcessorName())
- printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
+ printf("%-40s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
"Per Row(ns)", "Relative")
- println("-----------------------------------------------------------------------------------" +
- "--------")
+ println("-" * 96)
results.zip(benchmarks).foreach { case (result, benchmark) =>
- printf("%-35s %16s %12s %13s %10s\n",
+ printf("%-40s %16s %12s %13s %10s\n",
benchmark.name,
"%5.0f / %4.0f" format (result.bestMs, result.avgMs),
"%10.1f" format result.bestRate,
@@ -128,7 +127,7 @@ private[spark] object Benchmark {
}
}
- case class Case(name: String, fn: Timer => Unit)
+ case class Case(name: String, fn: Timer => Unit, numIters: Int)
case class Result(avgMs: Double, bestRate: Double, bestMs: Double)
/**
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index 52428634e5..b03df1a94d 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -244,7 +244,7 @@ class RadixSortSuite extends SparkFunSuite with Logging {
RadixSort.sortKeyPrefixArray(buf2, size, 0, 7, false, false)
timer.stopTiming()
}
- benchmark.run
+ benchmark.run()
/**
Running benchmark: radix sort 25000000