[SPARK-15107][SQL] Allow varying # iterations by test case in Benchmark

## What changes were proposed in this pull request? This patch changes our micro-benchmark util to allow setting different iteration numbers for different test cases. For some of our benchmarks, turning off whole-stage codegen can make the runtime 20X slower, making it very difficult to run a large number of times without substantially shortening the input cardinality. With this change, I set the default num iterations to 2 for whole stage codegen off, and 5 for whole stage codegen on. I also updated some results. ## How was this patch tested? N/A - this is a test util. Author: Reynold Xin <rxin@databricks.com> Closes #12884 from rxin/SPARK-15107.
author: Reynold Xin <rxin@databricks.com> 2016-05-03 22:56:40 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-05-03 22:56:40 -0700
commit: 695f0e9195209c75bfc62fc70bfc6d7d9f1047b3 (patch)
tree: 09a996aae9496a53b98ef5f0c6b9d0fabf3769dc /core
parent: 348c1389842c4d9a8807a41cf22caaa82f81d7ab (diff)
download: spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.tar.gz
spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.tar.bz2
spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.zip
2 files changed, 11 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
index 1fc0ad7a4d..0c685b1918 100644
--- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala
+++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
@@ -38,7 +38,7 @@ import org.apache.commons.lang3.SystemUtils
 private[spark] class Benchmark(
     name: String,
     valuesPerIteration: Long,
-    iters: Int = 5,
+    defaultNumIters: Int = 5,
     outputPerIteration: Boolean = false) {
   val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case]
 
@@ -46,8 +46,8 @@ private[spark] class Benchmark(
    * Adds a case to run when run() is called. The given function will be run for several
    * iterations to collect timing statistics.
    */
-  def addCase(name: String)(f: Int => Unit): Unit = {
-    addTimerCase(name) { timer =>
+  def addCase(name: String, numIters: Int = 0)(f: Int => Unit): Unit = {
+    addTimerCase(name, numIters) { timer =>
       timer.startTiming()
       f(timer.iteration)
       timer.stopTiming()
@@ -59,8 +59,8 @@ private[spark] class Benchmark(
    * until timer.startTiming() is called within the given function. The corresponding
    * timer.stopTiming() method must be called before the function returns.
    */
-  def addTimerCase(name: String)(f: Benchmark.Timer => Unit): Unit = {
-    benchmarks += Benchmark.Case(name, f)
+  def addTimerCase(name: String, numIters: Int = 0)(f: Benchmark.Timer => Unit): Unit = {
+    benchmarks += Benchmark.Case(name, f, if (numIters == 0) defaultNumIters else numIters)
   }
 
   /**
@@ -75,7 +75,7 @@ private[spark] class Benchmark(
 
     val results = benchmarks.map { c =>
       println("  Running case: " + c.name)
-      Benchmark.measure(valuesPerIteration, iters, outputPerIteration)(c.fn)
+      Benchmark.measure(valuesPerIteration, c.numIters, outputPerIteration)(c.fn)
     }
     println
 
@@ -83,12 +83,11 @@ private[spark] class Benchmark(
     // The results are going to be processor specific so it is useful to include that.
     println(Benchmark.getJVMOSInfo())
     println(Benchmark.getProcessorName())
-    printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
+    printf("%-40s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
       "Per Row(ns)", "Relative")
-    println("-----------------------------------------------------------------------------------" +
-      "--------")
+    println("-" * 96)
     results.zip(benchmarks).foreach { case (result, benchmark) =>
-      printf("%-35s %16s %12s %13s %10s\n",
+      printf("%-40s %16s %12s %13s %10s\n",
         benchmark.name,
         "%5.0f / %4.0f" format (result.bestMs, result.avgMs),
         "%10.1f" format result.bestRate,
@@ -128,7 +127,7 @@ private[spark] object Benchmark {
     }
   }
 
-  case class Case(name: String, fn: Timer => Unit)
+  case class Case(name: String, fn: Timer => Unit, numIters: Int)
   case class Result(avgMs: Double, bestRate: Double, bestMs: Double)
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index 52428634e5..b03df1a94d 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -244,7 +244,7 @@ class RadixSortSuite extends SparkFunSuite with Logging {
       RadixSort.sortKeyPrefixArray(buf2, size, 0, 7, false, false)
       timer.stopTiming()
     }
-    benchmark.run
+    benchmark.run()
 
     /**
       Running benchmark: radix sort 25000000
author	Reynold Xin <rxin@databricks.com>	2016-05-03 22:56:40 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-05-03 22:56:40 -0700
commit	695f0e9195209c75bfc62fc70bfc6d7d9f1047b3 (patch)
tree	09a996aae9496a53b98ef5f0c6b9d0fabf3769dc /core
parent	348c1389842c4d9a8807a41cf22caaa82f81d7ab (diff)
download	spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.tar.gz spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.tar.bz2 spark-695f0e9195209c75bfc62fc70bfc6d7d9f1047b3.zip