aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-11-23 17:21:37 -0800
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-11-23 17:21:37 -0800
commit7535d7fbcbe3c0c2515a2d17a806fa523917e398 (patch)
treee582e966567cab5ca2ff2176d2ee832f6db52b89 /core
parent51aa9d6e996895e15a6eb539b7049e6fe8a335e5 (diff)
downloadspark-7535d7fbcbe3c0c2515a2d17a806fa523917e398.tar.gz
spark-7535d7fbcbe3c0c2515a2d17a806fa523917e398.tar.bz2
spark-7535d7fbcbe3c0c2515a2d17a806fa523917e398.zip
Fixes to AppendOnlyMap:
- Use Murmur Hash 3 finalization step to scramble the bits of HashCode instead of the simpler version in java.util.HashMap; the latter one had trouble with ranges of consecutive integers. Murmur Hash 3 is used by fastutil. - Use Object.equals() instead of Scala's == to compare keys, because the latter does extra casts for numeric types (see the equals method in https://github.com/scala/scala/blob/master/src/library/scala/runtime/BoxesRunTime.java)
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala13
1 files changed, 6 insertions, 7 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
index f60deafc6f..8542541fe6 100644
--- a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
@@ -56,7 +56,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
var i = 1
while (true) {
val curKey = data(2 * pos)
- if (k.eq(curKey) || k == curKey) {
+ if (k.eq(curKey) || k.equals(curKey)) {
return data(2 * pos + 1).asInstanceOf[V]
} else if (curKey.eq(null)) {
return null.asInstanceOf[V]
@@ -104,7 +104,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
var i = 1
while (true) {
val curKey = data(2 * pos)
- if (k.eq(curKey) || k == curKey) {
+ if (k.eq(curKey) || k.equals(curKey)) {
val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
return newValue
@@ -167,12 +167,11 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
}
/**
- * Re-hash a value to deal better with hash functions that don't differ
- * in the lower bits, similar to java.util.HashMap
+ * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
+ * We use the Murmur Hash 3 finalization step that's also used in fastutil.
*/
private def rehash(h: Int): Int = {
- val r = h ^ (h >>> 20) ^ (h >>> 12)
- r ^ (r >>> 7) ^ (r >>> 4)
+ it.unimi.dsi.fastutil.HashCommon.murmurHash3(h)
}
/**
@@ -190,7 +189,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
data(2 * pos) = key
data(2 * pos + 1) = value.asInstanceOf[AnyRef]
return true
- } else if (curKey.eq(key) || curKey == key) {
+ } else if (curKey.eq(key) || curKey.equals(key)) {
data(2 * pos + 1) = value.asInstanceOf[AnyRef]
return false
} else {