aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-01-16 00:38:17 -0800
committerDavies Liu <davies.liu@gmail.com>2016-01-16 00:38:17 -0800
commit2f7d0b68a29de9755fc9fafd9a52c048981ad880 (patch)
tree7cda8a9bd00cf2a889d6ad5875b3f8d23e5b2275 /sql
parent242efb7546084592a5e8122549a27117977303fb (diff)
downloadspark-2f7d0b68a29de9755fc9fafd9a52c048981ad880.tar.gz
spark-2f7d0b68a29de9755fc9fafd9a52c048981ad880.tar.bz2
spark-2f7d0b68a29de9755fc9fafd9a52c048981ad880.zip
[SPARK-12856] [SQL] speed up hashCode of unsafe array
We iterate the bytes to calculate hashCode before, but now we have `Murmur3_x86_32.hashUnsafeBytes` that don't require the bytes to be word algned, we should use that instead. A simple benchmark shows it's about 3 X faster, benchmark code: https://gist.github.com/cloud-fan/fa77713ccebf0823b2ab#file-arrayhashbenchmark-scala Author: Wenchen Fan <wenchen@databricks.com> Closes #10784 from cloud-fan/array-hashcode.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java7
1 files changed, 2 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 3d80df2271..648625b2cc 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.hash.Murmur3_x86_32;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;
@@ -299,11 +300,7 @@ public class UnsafeArrayData extends ArrayData {
@Override
public int hashCode() {
- int result = 37;
- for (int i = 0; i < sizeInBytes; i++) {
- result = 37 * result + Platform.getByte(baseObject, baseOffset + i);
- }
- return result;
+ return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42);
}
@Override