From 7fe4fe630a3fc9755ebd0325bb595d76381633e8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 9 Feb 2016 13:06:36 -0800
Subject: [SPARK-12888] [SQL] [FOLLOW-UP] benchmark the new hash expression

Adds the benchmark results as comments.

The codegen version is slower than the interpreted version for `simple` case becasue of 3 reasons:

1. codegen version use a more complex hash algorithm than interpreted version, i.e. `Murmur3_x86_32.hashInt` vs [simple multiplication and addition](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala#L153).
2. codegen version will write the hash value to a row first and then read it out. I tried to create a `GenerateHasher` that can generate code to return hash value directly and got about 60% speed up for the `simple` case, does it worth?
3. the row in `simple` case only has one int field, so the runtime reflection may be removed because of branch prediction, which makes the interpreted version faster.

The `array` case is also slow for similar reasons, e.g. array elements are of same type, so interpreted version can probably get rid of runtime reflection by branch prediction.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #10917 from cloud-fan/hash-benchmark.
---
 .../scala/org/apache/spark/util/Benchmark.scala    |  4 +--
 .../scala/org/apache/spark/sql/HashBenchmark.scala | 40 ++++++++++++++++++----
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
index 1bf6f821e9..39d1829310 100644
--- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala
+++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala
@@ -35,7 +35,8 @@ import org.apache.commons.lang3.SystemUtils
  * If outputPerIteration is true, the timing for each run will be printed to stdout.
  */
 private[spark] class Benchmark(
-    name: String, valuesPerIteration: Long,
+    name: String,
+    valuesPerIteration: Long,
     iters: Int = 5,
     outputPerIteration: Boolean = false) {
   val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case]
@@ -61,7 +62,6 @@ private[spark] class Benchmark(
     println
 
     val firstBest = results.head.bestMs
-    val firstAvg = results.head.avgMs
     // The results are going to be processor specific so it is useful to include that.
     println(Benchmark.getProcessorName())
     printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
index 184f845b4d..5a929f211a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
@@ -29,9 +29,7 @@ import org.apache.spark.util.Benchmark
  */
 object HashBenchmark {
 
-  def test(name: String, schema: StructType, iters: Int): Unit = {
-    val numRows = 1024 * 8
-
+  def test(name: String, schema: StructType, numRows: Int, iters: Int): Unit = {
     val generator = RandomDataGenerator.forType(schema, nullable = false).get
     val encoder = RowEncoder(schema)
     val attrs = schema.toAttributes
@@ -70,7 +68,14 @@ object HashBenchmark {
 
   def main(args: Array[String]): Unit = {
     val simple = new StructType().add("i", IntegerType)
-    test("simple", simple, 1024)
+    /*
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+    Hash For simple:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    -------------------------------------------------------------------------------------------
+    interpreted version                       941 /  955        142.6           7.0       1.0X
+    codegen version                          1737 / 1775         77.3          12.9       0.5X
+     */
+    test("simple", simple, 1 << 13, 1 << 14)
 
     val normal = new StructType()
       .add("null", NullType)
@@ -87,18 +92,39 @@ object HashBenchmark {
       .add("binary", BinaryType)
       .add("date", DateType)
       .add("timestamp", TimestampType)
-    test("normal", normal, 128)
+    /*
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+    Hash For normal:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    -------------------------------------------------------------------------------------------
+    interpreted version                      2209 / 2271          0.9        1053.4       1.0X
+    codegen version                          1887 / 2018          1.1         899.9       1.2X
+     */
+    test("normal", normal, 1 << 10, 1 << 11)
 
     val arrayOfInt = ArrayType(IntegerType)
     val array = new StructType()
       .add("array", arrayOfInt)
       .add("arrayOfArray", ArrayType(arrayOfInt))
-    test("array", array, 64)
+    /*
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+    Hash For array:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    -------------------------------------------------------------------------------------------
+    interpreted version                      1481 / 1529          0.1       11301.7       1.0X
+    codegen version                          2591 / 2636          0.1       19771.1       0.6X
+     */
+    test("array", array, 1 << 8, 1 << 9)
 
     val mapOfInt = MapType(IntegerType, IntegerType)
     val map = new StructType()
       .add("map", mapOfInt)
       .add("mapOfMap", MapType(IntegerType, mapOfInt))
-    test("map", map, 64)
+    /*
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+    Hash For map:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    -------------------------------------------------------------------------------------------
+    interpreted version                      1820 / 1861          0.0      444347.2       1.0X
+    codegen version                           205 /  223          0.0       49936.5       8.9X
+     */
+    test("map", map, 1 << 6, 1 << 6)
   }
 }
-- 
cgit v1.2.3