diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-11-23 17:21:37 -0800 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-11-23 17:21:37 -0800 |
commit | 7535d7fbcbe3c0c2515a2d17a806fa523917e398 (patch) | |
tree | e582e966567cab5ca2ff2176d2ee832f6db52b89 /core | |
parent | 51aa9d6e996895e15a6eb539b7049e6fe8a335e5 (diff) | |
download | spark-7535d7fbcbe3c0c2515a2d17a806fa523917e398.tar.gz spark-7535d7fbcbe3c0c2515a2d17a806fa523917e398.tar.bz2 spark-7535d7fbcbe3c0c2515a2d17a806fa523917e398.zip |
Fixes to AppendOnlyMap:
- Use Murmur Hash 3 finalization step to scramble the bits of HashCode
instead of the simpler version in java.util.HashMap; the latter one
had trouble with ranges of consecutive integers. Murmur Hash 3 is used
by fastutil.
- Use Object.equals() instead of Scala's == to compare keys, because the
latter does extra casts for numeric types (see the equals method in
https://github.com/scala/scala/blob/master/src/library/scala/runtime/BoxesRunTime.java)
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala index f60deafc6f..8542541fe6 100644 --- a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala @@ -56,7 +56,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k == curKey) { + if (k.eq(curKey) || k.equals(curKey)) { return data(2 * pos + 1).asInstanceOf[V] } else if (curKey.eq(null)) { return null.asInstanceOf[V] @@ -104,7 +104,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k == curKey) { + if (k.eq(curKey) || k.equals(curKey)) { val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] return newValue @@ -167,12 +167,11 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi } /** - * Re-hash a value to deal better with hash functions that don't differ - * in the lower bits, similar to java.util.HashMap + * Re-hash a value to deal better with hash functions that don't differ in the lower bits. + * We use the Murmur Hash 3 finalization step that's also used in fastutil. */ private def rehash(h: Int): Int = { - val r = h ^ (h >>> 20) ^ (h >>> 12) - r ^ (r >>> 7) ^ (r >>> 4) + it.unimi.dsi.fastutil.HashCommon.murmurHash3(h) } /** @@ -190,7 +189,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi data(2 * pos) = key data(2 * pos + 1) = value.asInstanceOf[AnyRef] return true - } else if (curKey.eq(key) || curKey == key) { + } else if (curKey.eq(key) || curKey.equals(key)) { data(2 * pos + 1) = value.asInstanceOf[AnyRef] return false } else { |