[SPARK-13131] [SQL] Use best and average time in benchmark

Best time is stabler than average time, also added a column for nano seconds per row (which could be used to estimate contributions of each components in a query). Having best time and average time together for more information (we can see kind of variance). rate, time per row and relative are all calculated using best time. The result looks like this: ``` Intel(R) Core(TM) i7-4558U CPU 2.80GHz rang/filter/sum: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------- rang/filter/sum codegen=false 14332 / 16646 36.0 27.8 1.0X rang/filter/sum codegen=true 845 / 940 620.0 1.6 17.0X ``` Author: Davies Liu <davies@databricks.com> Closes #11018 from davies/gen_bench.
author: Davies Liu <davies@databricks.com> 2016-02-03 17:07:27 -0800
committer: Davies Liu <davies.liu@gmail.com> 2016-02-03 17:07:27 -0800
commit: de0914522fc5b2658959f9e2272b4e3162b14978 (patch)
tree: f4adb6adf78ea97f1fdf8053d63b6da5211bb4e3 /core
parent: 915a75398ecbccdbf9a1e07333104c857ae1ce5e (diff)
download: spark-de0914522fc5b2658959f9e2272b4e3162b14978.tar.gz
spark-de0914522fc5b2658959f9e2272b4e3162b14978.tar.bz2
spark-de0914522fc5b2658959f9e2272b4e3162b14978.zip
1 files changed, 24 insertions, 14 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
index d484cec7ae..d1699f5c28 100644
--- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala
+++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.util
 
 import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.lang3.SystemUtils
 
@@ -59,17 +60,21 @@ private[spark] class Benchmark(
     }
     println
 
-    val firstRate = results.head.avgRate
+    val firstBest = results.head.bestMs
+    val firstAvg = results.head.avgMs
     // The results are going to be processor specific so it is useful to include that.
     println(Benchmark.getProcessorName())
-    printf("%-30s %16s %16s %14s\n", name + ":", "Avg Time(ms)", "Avg Rate(M/s)", "Relative Rate")
-    println("-------------------------------------------------------------------------------")
-    results.zip(benchmarks).foreach { r =>
-      printf("%-30s %16s %16s %14s\n",
-        r._2.name,
-        "%10.2f" format r._1.avgMs,
-        "%10.2f" format r._1.avgRate,
-        "%6.2f X" format (r._1.avgRate / firstRate))
+    printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
+      "Per Row(ns)", "Relative")
+    println("-----------------------------------------------------------------------------------" +
+      "--------")
+    results.zip(benchmarks).foreach { case (result, benchmark) =>
+      printf("%-35s %16s %12s %13s %10s\n",
+        benchmark.name,
+        "%5.0f / %4.0f" format (result.bestMs, result.avgMs),
+        "%10.1f" format result.bestRate,
+        "%6.1f" format (1000 / result.bestRate),
+        "%3.1fX" format (firstBest / result.bestMs))
     }
     println
     // scalastyle:on
@@ -78,7 +83,7 @@ private[spark] class Benchmark(
 
 private[spark] object Benchmark {
   case class Case(name: String, fn: Int => Unit)
-  case class Result(avgMs: Double, avgRate: Double)
+  case class Result(avgMs: Double, bestRate: Double, bestMs: Double)
 
   /**
    * This should return a user helpful processor information. Getting at this depends on the OS.
@@ -99,22 +104,27 @@ private[spark] object Benchmark {
    * the rate of the function.
    */
   def measure(num: Long, iters: Int, outputPerIteration: Boolean)(f: Int => Unit): Result = {
-    var totalTime = 0L
+    val runTimes = ArrayBuffer[Long]()
     for (i <- 0 until iters + 1) {
       val start = System.nanoTime()
 
       f(i)
 
       val end = System.nanoTime()
-      if (i != 0) totalTime += end - start
+      val runTime = end - start
+      if (i > 0) {
+        runTimes += runTime
+      }
 
       if (outputPerIteration) {
         // scalastyle:off
-        println(s"Iteration $i took ${(end - start) / 1000} microseconds")
+        println(s"Iteration $i took ${runTime / 1000} microseconds")
         // scalastyle:on
       }
     }
-    Result(totalTime.toDouble / 1000000 / iters, num * iters / (totalTime.toDouble / 1000))
+    val best = runTimes.min
+    val avg = runTimes.sum / iters
+    Result(avg / 1000000, num / (best / 1000), best / 1000000)
   }
 }
author	Davies Liu <davies@databricks.com>	2016-02-03 17:07:27 -0800
committer	Davies Liu <davies.liu@gmail.com>	2016-02-03 17:07:27 -0800
commit	de0914522fc5b2658959f9e2272b4e3162b14978 (patch)
tree	f4adb6adf78ea97f1fdf8053d63b6da5211bb4e3 /core
parent	915a75398ecbccdbf9a1e07333104c857ae1ce5e (diff)
download	spark-de0914522fc5b2658959f9e2272b4e3162b14978.tar.gz spark-de0914522fc5b2658959f9e2272b4e3162b14978.tar.bz2 spark-de0914522fc5b2658959f9e2272b4e3162b14978.zip