aboutsummaryrefslogtreecommitdiff
path: root/unsafe/src
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-01-13 12:29:02 -0800
committerReynold Xin <rxin@databricks.com>2016-01-13 12:29:02 -0800
commitc2ea79f96acd076351b48162644ed1cff4c8e090 (patch)
tree55ca22bdd84dac3cb225cd2b9bddaf0c11c93d19 /unsafe/src
parente4e0b3f7b2945aae5ec7c3d68296010bbc5160cf (diff)
downloadspark-c2ea79f96acd076351b48162644ed1cff4c8e090.tar.gz
spark-c2ea79f96acd076351b48162644ed1cff4c8e090.tar.bz2
spark-c2ea79f96acd076351b48162644ed1cff4c8e090.zip
[SPARK-12642][SQL] improve the hash expression to be decoupled from unsafe row
https://issues.apache.org/jira/browse/SPARK-12642 Author: Wenchen Fan <wenchen@databricks.com> Closes #10694 from cloud-fan/hash-expr.
Diffstat (limited to 'unsafe/src')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java28
1 files changed, 27 insertions, 1 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
index 4276f25c21..5e7ee480ca 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -38,6 +38,10 @@ public final class Murmur3_x86_32 {
}
public int hashInt(int input) {
+ return hashInt(input, seed);
+ }
+
+ public static int hashInt(int input, int seed) {
int k1 = mixK1(input);
int h1 = mixH1(seed, k1);
@@ -51,16 +55,38 @@ public final class Murmur3_x86_32 {
public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
// This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
+ int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
+ return fmix(h1, lengthInBytes);
+ }
+
+ public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
+ assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
+ int lengthAligned = lengthInBytes - lengthInBytes % 4;
+ int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
+ for (int i = lengthAligned; i < lengthInBytes; i++) {
+ int halfWord = Platform.getByte(base, offset + i);
+ int k1 = mixK1(halfWord);
+ h1 = mixH1(h1, k1);
+ }
+ return fmix(h1, lengthInBytes);
+ }
+
+ private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
+ assert (lengthInBytes % 4 == 0);
int h1 = seed;
for (int i = 0; i < lengthInBytes; i += 4) {
int halfWord = Platform.getInt(base, offset + i);
int k1 = mixK1(halfWord);
h1 = mixH1(h1, k1);
}
- return fmix(h1, lengthInBytes);
+ return h1;
}
public int hashLong(long input) {
+ return hashLong(input, seed);
+ }
+
+ public static int hashLong(long input, int seed) {
int low = (int) input;
int high = (int) (input >>> 32);