4 files changed, 254 insertions, 88 deletions
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java
new file mode 100644
index 0000000000..67a5eb0c7f
--- /dev/null
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions;
+
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.types.UTF8String;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+public class HiveHasherSuite {
+  private final static HiveHasher hasher = new HiveHasher();
+
+  @Test
+  public void testKnownIntegerInputs() {
+    int[] inputs = {0, Integer.MIN_VALUE, Integer.MAX_VALUE, 593689054, -189366624};
+    for (int input : inputs) {
+      Assert.assertEquals(input, HiveHasher.hashInt(input));
+    }
+  }
+
+  @Test
+  public void testKnownLongInputs() {
+    Assert.assertEquals(0, HiveHasher.hashLong(0L));
+    Assert.assertEquals(41, HiveHasher.hashLong(-42L));
+    Assert.assertEquals(42, HiveHasher.hashLong(42L));
+    Assert.assertEquals(-2147483648, HiveHasher.hashLong(Long.MIN_VALUE));
+    Assert.assertEquals(-2147483648, HiveHasher.hashLong(Long.MAX_VALUE));
+  }
+
+  @Test
+  public void testKnownStringAndIntInputs() {
+    int[] inputs = {84, 19, 8};
+    int[] expected = {-823832826, -823835053, 111972242};
+
+    for (int i = 0; i < inputs.length; i++) {
+      UTF8String s = UTF8String.fromString("val_" + inputs[i]);
+      int hash = HiveHasher.hashUnsafeBytes(s.getBaseObject(), s.getBaseOffset(), s.numBytes());
+      Assert.assertEquals(expected[i], ((31 * inputs[i]) + hash));
+    }
+  }
+
+  @Test
+  public void randomizedStressTest() {
+    int size = 65536;
+    Random rand = new Random();
+
+    // A set used to track collision rate.
+    Set<Integer> hashcodes = new HashSet<>();
+    for (int i = 0; i < size; i++) {
+      int vint = rand.nextInt();
+      long lint = rand.nextLong();
+      Assert.assertEquals(HiveHasher.hashInt(vint), HiveHasher.hashInt(vint));
+      Assert.assertEquals(HiveHasher.hashLong(lint), HiveHasher.hashLong(lint));
+
+      hashcodes.add(HiveHasher.hashLong(lint));
+    }
+
+    // A very loose bound.
+    Assert.assertTrue(hashcodes.size() > size * 0.95);
+  }
+
+  @Test
+  public void randomizedStressTestBytes() {
+    int size = 65536;
+    Random rand = new Random();
+
+    // A set used to track collision rate.
+    Set<Integer> hashcodes = new HashSet<>();
+    for (int i = 0; i < size; i++) {
+      int byteArrSize = rand.nextInt(100) * 8;
+      byte[] bytes = new byte[byteArrSize];
+      rand.nextBytes(bytes);
+
+      Assert.assertEquals(
+          HiveHasher.hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
+          HiveHasher.hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+
+      hashcodes.add(HiveHasher.hashUnsafeBytes(
+          bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+    }
+
+    // A very loose bound.
+    Assert.assertTrue(hashcodes.size() > size * 0.95);
+  }
+
+  @Test
+  public void randomizedStressTestPaddedStrings() {
+    int size = 64000;
+    // A set used to track collision rate.
+    Set<Integer> hashcodes = new HashSet<>();
+    for (int i = 0; i < size; i++) {
+      int byteArrSize = 8;
+      byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
+      byte[] paddedBytes = new byte[byteArrSize];
+      System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
+
+      Assert.assertEquals(
+          HiveHasher.hashUnsafeBytes(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
+          HiveHasher.hashUnsafeBytes(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+
+      hashcodes.add(HiveHasher.hashUnsafeBytes(
+          paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+    }
+
+    // A very loose bound.
+    Assert.assertTrue(hashcodes.size() > size * 0.95);
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
index c6a1a2be0d..2d94b66a1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
@@ -42,8 +42,8 @@ object HashBenchmark {
 
     val benchmark = new Benchmark("Hash For " + name, iters * numRows)
     benchmark.addCase("interpreted version") { _: Int =>
+      var sum = 0
       for (_ <- 0L until iters) {
-        var sum = 0
         var i = 0
         while (i < numRows) {
           sum += rows(i).hashCode()
@@ -54,8 +54,8 @@ object HashBenchmark {
 
     val getHashCode = UnsafeProjection.create(new Murmur3Hash(attrs) :: Nil, attrs)
     benchmark.addCase("codegen version") { _: Int =>
+      var sum = 0
       for (_ <- 0L until iters) {
-        var sum = 0
         var i = 0
         while (i < numRows) {
           sum += getHashCode(rows(i)).getInt(0)
@@ -66,8 +66,8 @@ object HashBenchmark {
 
     val getHashCode64b = UnsafeProjection.create(new XxHash64(attrs) :: Nil, attrs)
     benchmark.addCase("codegen version 64-bit") { _: Int =>
+      var sum = 0
       for (_ <- 0L until iters) {
-        var sum = 0
         var i = 0
         while (i < numRows) {
           sum += getHashCode64b(rows(i)).getInt(0)
@@ -76,30 +76,44 @@ object HashBenchmark {
       }
     }
 
+    val getHiveHashCode = UnsafeProjection.create(new HiveHash(attrs) :: Nil, attrs)
+    benchmark.addCase("codegen HiveHash version") { _: Int =>
+      var sum = 0
+      for (_ <- 0L until iters) {
+        var i = 0
+        while (i < numRows) {
+          sum += getHiveHashCode(rows(i)).getInt(0)
+          i += 1
+        }
+      }
+    }
+
     benchmark.run()
   }
 
   def main(args: Array[String]): Unit = {
     val singleInt = new StructType().add("i", IntegerType)
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash For single ints:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    interpreted version                      1006 / 1011        133.4           7.5       1.0X
-    codegen version                          1835 / 1839         73.1          13.7       0.5X
-    codegen version 64-bit                   1627 / 1628         82.5          12.1       0.6X
-     */
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash For single ints:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    interpreted version                           3262 / 3267        164.6           6.1       1.0X
+    codegen version                               6448 / 6718         83.3          12.0       0.5X
+    codegen version 64-bit                        6088 / 6154         88.2          11.3       0.5X
+    codegen HiveHash version                      4732 / 4745        113.5           8.8       0.7X
+    */
     test("single ints", singleInt, 1 << 15, 1 << 14)
 
     val singleLong = new StructType().add("i", LongType)
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash For single longs:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    interpreted version                      1196 / 1209        112.2           8.9       1.0X
-    codegen version                          2178 / 2181         61.6          16.2       0.5X
-    codegen version 64-bit                   1752 / 1753         76.6          13.1       0.7X
-     */
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash For single longs:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    interpreted version                           3716 / 3726        144.5           6.9       1.0X
+    codegen version                               7706 / 7732         69.7          14.4       0.5X
+    codegen version 64-bit                        6370 / 6399         84.3          11.9       0.6X
+    codegen HiveHash version                      4924 / 5026        109.0           9.2       0.8X
+    */
     test("single longs", singleLong, 1 << 15, 1 << 14)
 
     val normal = new StructType()
@@ -118,13 +132,14 @@ object HashBenchmark {
       .add("date", DateType)
       .add("timestamp", TimestampType)
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash For normal:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    interpreted version                      2713 / 2715          0.8        1293.5       1.0X
-    codegen version                          2015 / 2018          1.0         960.9       1.3X
-    codegen version 64-bit                    735 /  738          2.9         350.7       3.7X
-     */
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash For normal:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    interpreted version                           2985 / 3013          0.7        1423.4       1.0X
+    codegen version                               2422 / 2434          0.9        1155.1       1.2X
+    codegen version 64-bit                         856 /  920          2.5         408.0       3.5X
+    codegen HiveHash version                      4501 / 4979          0.5        2146.4       0.7X
+    */
     test("normal", normal, 1 << 10, 1 << 11)
 
     val arrayOfInt = ArrayType(IntegerType)
@@ -132,13 +147,14 @@ object HashBenchmark {
       .add("array", arrayOfInt)
       .add("arrayOfArray", ArrayType(arrayOfInt))
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash For array:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    interpreted version                      1498 / 1499          0.1       11432.1       1.0X
-    codegen version                          2642 / 2643          0.0       20158.4       0.6X
-    codegen version 64-bit                   2421 / 2424          0.1       18472.5       0.6X
-     */
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash For array:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    interpreted version                           3100 / 3555          0.0       23651.8       1.0X
+    codegen version                               5779 / 5865          0.0       44088.4       0.5X
+    codegen version 64-bit                        4738 / 4821          0.0       36151.7       0.7X
+    codegen HiveHash version                      2200 / 2246          0.1       16785.9       1.4X
+    */
     test("array", array, 1 << 8, 1 << 9)
 
     val mapOfInt = MapType(IntegerType, IntegerType)
@@ -146,13 +162,14 @@ object HashBenchmark {
       .add("map", mapOfInt)
       .add("mapOfMap", MapType(IntegerType, mapOfInt))
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash For map:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    interpreted version                      1612 / 1618          0.0      393553.4       1.0X
-    codegen version                           149 /  150          0.0       36381.2      10.8X
-    codegen version 64-bit                    144 /  145          0.0       35122.1      11.2X
-     */
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash For map:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    interpreted version                              0 /    0         48.1          20.8       1.0X
+    codegen version                                257 /  275          0.0       62768.7       0.0X
+    codegen version 64-bit                         226 /  240          0.0       55224.5       0.0X
+    codegen HiveHash version                        89 /   96          0.0       21708.8       0.0X
+    */
     test("map", map, 1 << 6, 1 << 6)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
index 53f21a8442..2a753a0c84 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import java.util.Random
 
-import org.apache.spark.sql.catalyst.expressions.XXH64
+import org.apache.spark.sql.catalyst.expressions.{HiveHasher, XXH64}
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.hash.Murmur3_x86_32
 import org.apache.spark.util.Benchmark
@@ -38,8 +38,8 @@ object HashByteArrayBenchmark {
 
     val benchmark = new Benchmark("Hash byte arrays with length " + length, iters * numArrays)
     benchmark.addCase("Murmur3_x86_32") { _: Int =>
+      var sum = 0L
       for (_ <- 0L until iters) {
-        var sum = 0
         var i = 0
         while (i < numArrays) {
           sum += Murmur3_x86_32.hashUnsafeBytes(arrays(i), Platform.BYTE_ARRAY_OFFSET, length, 42)
@@ -49,8 +49,8 @@ object HashByteArrayBenchmark {
     }
 
     benchmark.addCase("xxHash 64-bit") { _: Int =>
+      var sum = 0L
       for (_ <- 0L until iters) {
-        var sum = 0L
         var i = 0
         while (i < numArrays) {
           sum += XXH64.hashUnsafeBytes(arrays(i), Platform.BYTE_ARRAY_OFFSET, length, 42)
@@ -59,90 +59,110 @@ object HashByteArrayBenchmark {
       }
     }
 
+    benchmark.addCase("HiveHasher") { _: Int =>
+      var sum = 0L
+      for (_ <- 0L until iters) {
+        var i = 0
+        while (i < numArrays) {
+          sum += HiveHasher.hashUnsafeBytes(arrays(i), Platform.BYTE_ARRAY_OFFSET, length)
+          i += 1
+        }
+      }
+    }
+
     benchmark.run()
   }
 
   def main(args: Array[String]): Unit = {
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 8:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                             11 /   12        185.1           5.4       1.0X
-    xxHash 64-bit                              17 /   18        120.0           8.3       0.6X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 8:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                  12 /   16        174.3           5.7       1.0X
+    xxHash 64-bit                                   17 /   22        120.0           8.3       0.7X
+    HiveHasher                                      13 /   15        162.1           6.2       0.9X
     */
     test(8, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 16:    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                             18 /   18        118.6           8.4       1.0X
-    xxHash 64-bit                              20 /   21        102.5           9.8       0.9X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 16:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                  19 /   22        107.6           9.3       1.0X
+    xxHash 64-bit                                   20 /   24        104.6           9.6       1.0X
+    HiveHasher                                      24 /   28         87.0          11.5       0.8X
     */
     test(16, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 24:    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                             24 /   24         86.6          11.5       1.0X
-    xxHash 64-bit                              23 /   23         93.2          10.7       1.1X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 24:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                  28 /   32         74.8          13.4       1.0X
+    xxHash 64-bit                                   24 /   29         87.3          11.5       1.2X
+    HiveHasher                                      36 /   41         57.7          17.3       0.8X
     */
     test(24, 42L, 1 << 10, 1 << 11)
 
     // Add 31 to all arrays to create worse case alignment for xxHash.
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 31:    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                             38 /   39         54.7          18.3       1.0X
-    xxHash 64-bit                              33 /   33         64.4          15.5       1.2X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 31:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                  41 /   45         51.1          19.6       1.0X
+    xxHash 64-bit                                   36 /   44         58.8          17.0       1.2X
+    HiveHasher                                      49 /   54         42.6          23.5       0.8X
     */
     test(31, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 95:    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                             91 /   94         22.9          43.6       1.0X
-    xxHash 64-bit                              68 /   69         30.6          32.7       1.3X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 95:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                 100 /  110         21.0          47.7       1.0X
+    xxHash 64-bit                                   74 /   78         28.2          35.5       1.3X
+    HiveHasher                                     189 /  196         11.1          90.3       0.5X
     */
     test(64 + 31, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 287:   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                            268 /  268          7.8         127.6       1.0X
-    xxHash 64-bit                             108 /  109         19.4          51.6       2.5X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 287:        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                 299 /  311          7.0         142.4       1.0X
+    xxHash 64-bit                                  113 /  122         18.5          54.1       2.6X
+    HiveHasher                                     620 /  624          3.4         295.5       0.5X
     */
     test(256 + 31, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 1055:  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                            942 /  945          2.2         449.4       1.0X
-    xxHash 64-bit                             276 /  276          7.6         131.4       3.4X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 1055:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                1068 / 1070          2.0         509.1       1.0X
+    xxHash 64-bit                                  306 /  315          6.9         145.9       3.5X
+    HiveHasher                                    2316 / 2369          0.9        1104.3       0.5X
     */
     test(1024 + 31, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 2079:  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                           1839 / 1843          1.1         876.8       1.0X
-    xxHash 64-bit                             445 /  448          4.7         212.1       4.1X
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 2079:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                2252 / 2274          0.9        1074.1       1.0X
+    xxHash 64-bit                                  534 /  580          3.9         254.6       4.2X
+    HiveHasher                                    4739 / 4786          0.4        2259.8       0.5X
     */
     test(2048 + 31, 42L, 1 << 10, 1 << 11)
 
     /*
-    Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz
-    Hash byte arrays with length 8223:  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    -------------------------------------------------------------------------------------------
-    Murmur3_x86_32                           7307 / 7310          0.3        3484.4       1.0X
-    xxHash 64-bit                            1487 / 1488          1.4         709.1       4.9X
-     */
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Hash byte arrays with length 8223:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Murmur3_x86_32                                9249 / 9586          0.2        4410.5       1.0X
+    xxHash 64-bit                                 2897 / 3241          0.7        1381.6       3.2X
+    HiveHasher                                  19392 / 20211          0.1        9246.6       0.5X
+    */
     test(8192 + 31, 42L, 1 << 10, 1 << 11)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
index 33916c0891..13ce588462 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
@@ -145,7 +145,7 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
     val encoder = RowEncoder(inputSchema)
     val seed = scala.util.Random.nextInt()
-    test(s"murmur3/xxHash64 hash: ${inputSchema.simpleString}") {
+    test(s"murmur3/xxHash64/hive hash: ${inputSchema.simpleString}") {
       for (_ <- 1 to 10) {
         val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow]
         val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map {
@@ -154,6 +154,7 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         // Only test the interpreted version has same result with codegen version.
         checkEvaluation(Murmur3Hash(literals, seed), Murmur3Hash(literals, seed).eval())
         checkEvaluation(XxHash64(literals, seed), XxHash64(literals, seed).eval())
+        checkEvaluation(HiveHash(literals), HiveHash(literals).eval())
       }
     }
   }