aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2016-02-03 17:07:27 -0800
committerDavies Liu <davies.liu@gmail.com>2016-02-03 17:07:27 -0800
commitde0914522fc5b2658959f9e2272b4e3162b14978 (patch)
treef4adb6adf78ea97f1fdf8053d63b6da5211bb4e3 /core
parent915a75398ecbccdbf9a1e07333104c857ae1ce5e (diff)
downloadspark-de0914522fc5b2658959f9e2272b4e3162b14978.tar.gz
spark-de0914522fc5b2658959f9e2272b4e3162b14978.tar.bz2
spark-de0914522fc5b2658959f9e2272b4e3162b14978.zip
[SPARK-13131] [SQL] Use best and average time in benchmark
Best time is stabler than average time, also added a column for nano seconds per row (which could be used to estimate contributions of each components in a query). Having best time and average time together for more information (we can see kind of variance). rate, time per row and relative are all calculated using best time. The result looks like this: ``` Intel(R) Core(TM) i7-4558U CPU 2.80GHz rang/filter/sum: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------- rang/filter/sum codegen=false 14332 / 16646 36.0 27.8 1.0X rang/filter/sum codegen=true 845 / 940 620.0 1.6 17.0X ``` Author: Davies Liu <davies@databricks.com> Closes #11018 from davies/gen_bench.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/Benchmark.scala38
1 files changed, 24 insertions, 14 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
index d484cec7ae..d1699f5c28 100644
--- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala
+++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
@@ -18,6 +18,7 @@
package org.apache.spark.util
import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
import org.apache.commons.lang3.SystemUtils
@@ -59,17 +60,21 @@ private[spark] class Benchmark(
}
println
- val firstRate = results.head.avgRate
+ val firstBest = results.head.bestMs
+ val firstAvg = results.head.avgMs
// The results are going to be processor specific so it is useful to include that.
println(Benchmark.getProcessorName())
- printf("%-30s %16s %16s %14s\n", name + ":", "Avg Time(ms)", "Avg Rate(M/s)", "Relative Rate")
- println("-------------------------------------------------------------------------------")
- results.zip(benchmarks).foreach { r =>
- printf("%-30s %16s %16s %14s\n",
- r._2.name,
- "%10.2f" format r._1.avgMs,
- "%10.2f" format r._1.avgRate,
- "%6.2f X" format (r._1.avgRate / firstRate))
+ printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
+ "Per Row(ns)", "Relative")
+ println("-----------------------------------------------------------------------------------" +
+ "--------")
+ results.zip(benchmarks).foreach { case (result, benchmark) =>
+ printf("%-35s %16s %12s %13s %10s\n",
+ benchmark.name,
+ "%5.0f / %4.0f" format (result.bestMs, result.avgMs),
+ "%10.1f" format result.bestRate,
+ "%6.1f" format (1000 / result.bestRate),
+ "%3.1fX" format (firstBest / result.bestMs))
}
println
// scalastyle:on
@@ -78,7 +83,7 @@ private[spark] class Benchmark(
private[spark] object Benchmark {
case class Case(name: String, fn: Int => Unit)
- case class Result(avgMs: Double, avgRate: Double)
+ case class Result(avgMs: Double, bestRate: Double, bestMs: Double)
/**
* This should return a user helpful processor information. Getting at this depends on the OS.
@@ -99,22 +104,27 @@ private[spark] object Benchmark {
* the rate of the function.
*/
def measure(num: Long, iters: Int, outputPerIteration: Boolean)(f: Int => Unit): Result = {
- var totalTime = 0L
+ val runTimes = ArrayBuffer[Long]()
for (i <- 0 until iters + 1) {
val start = System.nanoTime()
f(i)
val end = System.nanoTime()
- if (i != 0) totalTime += end - start
+ val runTime = end - start
+ if (i > 0) {
+ runTimes += runTime
+ }
if (outputPerIteration) {
// scalastyle:off
- println(s"Iteration $i took ${(end - start) / 1000} microseconds")
+ println(s"Iteration $i took ${runTime / 1000} microseconds")
// scalastyle:on
}
}
- Result(totalTime.toDouble / 1000000 / iters, num * iters / (totalTime.toDouble / 1000))
+ val best = runTimes.min
+ val avg = runTimes.sum / iters
+ Result(avg / 1000000, num / (best / 1000), best / 1000000)
}
}