aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-07-22 13:02:43 -0700
committerDavies Liu <davies.liu@gmail.com>2015-07-22 13:02:43 -0700
commite0b7ba59a1ace9b78a1ad6f3f07fe153db20b52c (patch)
tree539e14cbb49b30181461e7e01ca0056a5f1fe935 /unsafe
parent86f80e2b4759e574fe3eb91695f81b644db87242 (diff)
downloadspark-e0b7ba59a1ace9b78a1ad6f3f07fe153db20b52c.tar.gz
spark-e0b7ba59a1ace9b78a1ad6f3f07fe153db20b52c.tar.bz2
spark-e0b7ba59a1ace9b78a1ad6f3f07fe153db20b52c.zip
[SPARK-9024] Unsafe HashJoin/HashOuterJoin/HashSemiJoin
This PR introduce unsafe version (using UnsafeRow) of HashJoin, HashOuterJoin and HashSemiJoin, including the broadcast one and shuffle one (except FullOuterJoin, which is better to be implemented using SortMergeJoin). It use HashMap to store UnsafeRow right now, will change to use BytesToBytesMap for better performance (in another PR). Author: Davies Liu <davies@databricks.com> Closes #7480 from davies/unsafe_join and squashes the following commits: 6294b1e [Davies Liu] fix projection 10583f1 [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_join dede020 [Davies Liu] fix test 84c9807 [Davies Liu] address comments a05b4f6 [Davies Liu] support UnsafeRow in LeftSemiJoinBNL and BroadcastNestedLoopJoin 611d2ed [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_join 9481ae8 [Davies Liu] return UnsafeRow after join() ca2b40f [Davies Liu] revert unrelated change 68f5cd9 [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_join 0f4380d [Davies Liu] ada a comment 69e38f5 [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_join 1a40f02 [Davies Liu] refactor ab1690f [Davies Liu] address comments 60371f2 [Davies Liu] use UnsafeRow in SemiJoin a6c0b7d [Davies Liu] Merge branch 'master' of github.com:apache/spark into unsafe_join 184b852 [Davies Liu] fix style 6acbb11 [Davies Liu] fix tests 95d0762 [Davies Liu] remove println bea4a50 [Davies Liu] Unsafe HashJoin
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java10
1 files changed, 7 insertions, 3 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
index 85cd02469a..61f483ced3 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -44,12 +44,16 @@ public final class Murmur3_x86_32 {
return fmix(h1, 4);
}
- public int hashUnsafeWords(Object baseObject, long baseOffset, int lengthInBytes) {
+ public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
+ return hashUnsafeWords(base, offset, lengthInBytes, seed);
+ }
+
+ public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
// This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
int h1 = seed;
- for (int offset = 0; offset < lengthInBytes; offset += 4) {
- int halfWord = PlatformDependent.UNSAFE.getInt(baseObject, baseOffset + offset);
+ for (int i = 0; i < lengthInBytes; i += 4) {
+ int halfWord = PlatformDependent.UNSAFE.getInt(base, offset + i);
int k1 = mixK1(halfWord);
h1 = mixH1(h1, k1);
}