summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2011-09-08 05:43:32 +0000
committerPaul Phillips <paulp@improving.org>2011-09-08 05:43:32 +0000
commit52c1d019d63d2fffadc8f3f159d654187ac61ece (patch)
tree6958700cb96b574b0894b3792f76be68d2a7161d /src
parentc37e8f45cf86d0a3f6cb084e85f9751f5e906bf2 (diff)
downloadscala-52c1d019d63d2fffadc8f3f159d654187ac61ece.tar.gz
scala-52c1d019d63d2fffadc8f3f159d654187ac61ece.tar.bz2
scala-52c1d019d63d2fffadc8f3f159d654187ac61ece.zip
Refinement of murmurhash implementation.
Integrates recent speed improvements to algorithm. Contributed by Ruediger Keller, no review.
Diffstat (limited to 'src')
-rw-r--r--src/compiler/scala/tools/nsc/util/BitSet.scala21
-rw-r--r--src/library/scala/collection/GenMapLike.scala2
-rw-r--r--src/library/scala/collection/GenSeqLike.scala11
-rw-r--r--src/library/scala/collection/GenSetLike.scala2
-rw-r--r--src/library/scala/runtime/ScalaRunTime.scala25
-rw-r--r--src/library/scala/util/MurmurHash.scala3
-rw-r--r--src/library/scala/util/MurmurHash3.scala178
7 files changed, 199 insertions, 43 deletions
diff --git a/src/compiler/scala/tools/nsc/util/BitSet.scala b/src/compiler/scala/tools/nsc/util/BitSet.scala
index a8d54c2c84..de0c6fd62a 100644
--- a/src/compiler/scala/tools/nsc/util/BitSet.scala
+++ b/src/compiler/scala/tools/nsc/util/BitSet.scala
@@ -72,20 +72,15 @@ abstract class BitSet {
}
override def hashCode: Int = {
- import scala.util.MurmurHash._
- var h = startHash(hashSeed)
- var c = startMagicA
- var k = startMagicB
- for (idx <- 0 until nwords) {
- val w = word(idx)
- h = extendHash(h, (w>>>32).toInt, c, k)
- c = nextMagicA(c)
- k = nextMagicB(k)
- h = extendHash(h, w.toInt, c, k)
- c = nextMagicA(c)
- k = nextMagicB(k)
+ import scala.util.MurmurHash3._
+ var h = hashSeed
+ var i = 0; while(i < nwords) {
+ val w = word(i)
+ h = mix(h, (w >>> 32).toInt)
+ h = mix(h, w.toInt)
+ i += 1
}
- finalizeHash(h)
+ finalizeHash(h, nwords)
}
def addString(sb: StringBuilder, start: String, sep: String, end: String) {
diff --git a/src/library/scala/collection/GenMapLike.scala b/src/library/scala/collection/GenMapLike.scala
index 6060087d54..f4a3903c13 100644
--- a/src/library/scala/collection/GenMapLike.scala
+++ b/src/library/scala/collection/GenMapLike.scala
@@ -32,7 +32,7 @@ trait GenMapLike[A, +B, +Repr] extends GenIterableLike[(A, B), Repr] with Equals
// This hash code must be symmetric in the contents but ought not
// collide trivially.
- override def hashCode() = util.MurmurHash.symmetricHash(seq, Map.hashSeed)
+ override def hashCode() = util.MurmurHash3.symmetricHash(seq, Map.hashSeed)
/** Returns the value associated with a key, or a default value if the key is not contained in the map.
* @param key the key.
diff --git a/src/library/scala/collection/GenSeqLike.scala b/src/library/scala/collection/GenSeqLike.scala
index 57e5105e23..2d50cb18b3 100644
--- a/src/library/scala/collection/GenSeqLike.scala
+++ b/src/library/scala/collection/GenSeqLike.scala
@@ -440,9 +440,14 @@ trait GenSeqLike[+A, +Repr] extends GenIterableLike[A, Repr] with Equals with Pa
* elements of the $coll.
*/
override def hashCode() = {
- val h = new util.MurmurHash[A](Seq.hashSeed)
- seq.foreach(h)
- h.hash
+ import util.MurmurHash3._
+ var n = 0
+ var h = Seq.hashSeed
+ seq foreach {
+ x => h = mix(h, x.##)
+ n += 1
+ }
+ finalizeHash(h, n)
}
/** The equals method for arbitrary sequences. Compares this sequence to
diff --git a/src/library/scala/collection/GenSetLike.scala b/src/library/scala/collection/GenSetLike.scala
index 2fc94c2e87..adbb043ecd 100644
--- a/src/library/scala/collection/GenSetLike.scala
+++ b/src/library/scala/collection/GenSetLike.scala
@@ -143,6 +143,6 @@ extends GenIterableLike[A, Repr]
// Calling map on a set drops duplicates: any hashcode collisions would
// then be dropped before they can be added.
// Hash should be symmetric in set entries, but without trivial collisions.
- override def hashCode() = util.MurmurHash.symmetricHash(seq, Set.hashSeed)
+ override def hashCode() = util.MurmurHash3.symmetricHash(seq, Set.hashSeed)
}
diff --git a/src/library/scala/runtime/ScalaRunTime.scala b/src/library/scala/runtime/ScalaRunTime.scala
index 28a093ce42..9162ae0fc0 100644
--- a/src/library/scala/runtime/ScalaRunTime.scala
+++ b/src/library/scala/runtime/ScalaRunTime.scala
@@ -174,30 +174,7 @@ object ScalaRunTime {
def _toString(x: Product): String =
x.productIterator.mkString(x.productPrefix + "(", ",", ")")
- def _hashCode(x: Product): Int = {
- import scala.util.MurmurHash._
- val arr = x.productArity
- // Case objects have the hashCode inlined directly into the
- // synthetic hashCode method, but this method should still give
- // a correct result if passed a case object.
- if (arr == 0) {
- x.productPrefix.hashCode
- }
- else {
- var h = startHash(arr)
- var c = startMagicA
- var k = startMagicB
- var i = 0
- while (i < arr) {
- val elem = x.productElement(i)
- h = extendHash(h, elem.##, c, k)
- c = nextMagicA(c)
- k = nextMagicB(k)
- i += 1
- }
- finalizeHash(h)
- }
- }
+ def _hashCode(x: Product): Int = scala.util.MurmurHash3.productHash(x)
/** A helper for case classes. */
def typedProductIterator[T](x: Product): Iterator[T] = {
diff --git a/src/library/scala/util/MurmurHash.scala b/src/library/scala/util/MurmurHash.scala
index 71db2536be..029fe095af 100644
--- a/src/library/scala/util/MurmurHash.scala
+++ b/src/library/scala/util/MurmurHash.scala
@@ -27,6 +27,7 @@ import scala.collection.Iterator
* or can take individual hash values with append. Its own hash code is
* set equal to the hash code of whatever it is hashing.
*/
+@deprecated("Use the object MurmurHash3 instead.", "2.10.0")
class MurmurHash[@specialized(Int,Long,Float,Double) T](seed: Int) extends (T => Unit) {
import MurmurHash._
@@ -79,7 +80,7 @@ class MurmurHash[@specialized(Int,Long,Float,Double) T](seed: Int) extends (T =>
* incorporate a new integer) to update the values. Only one method
* needs to be called to finalize the hash.
*/
-
+@deprecated("Use the object MurmurHash3 instead.", "2.10.0")
object MurmurHash {
// Magic values used for MurmurHash's 32 bit hash.
// Don't change these without consulting a hashing expert!
diff --git a/src/library/scala/util/MurmurHash3.scala b/src/library/scala/util/MurmurHash3.scala
new file mode 100644
index 0000000000..0fd8b2e123
--- /dev/null
+++ b/src/library/scala/util/MurmurHash3.scala
@@ -0,0 +1,178 @@
+package scala.util
+
+import java.lang.Integer.{ rotateLeft => rotl }
+
+
+/**
+ * An implementation of Austin Appleby's MurmurHash 3 algorithm
+ * (MurmurHash3_x86_32).
+ *
+ * An algorithm designed to generate well-distributed non-cryptographic
+ * hashes. It is designed to hash data in 32 bit chunks (ints).
+ *
+ * The mix method needs to be called at each step to update the intermediate
+ * hash value. For the last chunk to incorporate into the hash mixLast may
+ * be used instead, which is slightly faster. Finally finalizeHash needs to
+ * be called to compute the final hash value.
+ *
+ * This is based on the earlier MurmurHash3 code by Rex Kerr, but the
+ * MurmurHash3 algorithm was since changed by its creator Austin Appleby
+ * to remedy some weaknesses and improve performance. This represents the
+ * latest and supposedly final version of the algortihm (revision 136).
+ *
+ * @see http://code.google.com/p/smhasher
+ */
+object MurmurHash3 {
+
+ // Some arbitrary values used as hash seeds
+ final val arraySeed = 0x3c074a61
+ final val stringSeed = 0xf7ca7fd2
+ final val productSeed = 0xcafebabe
+ final val symmetricSeed = 0xb592f7ae
+ final val traversableSeed = 0xe73a8b15
+
+ /** Mix in a block of data into an intermediate hash value. */
+ final def mix(hash: Int, data: Int): Int = {
+ var h = mixLast(hash, data)
+ h = rotl(h, 13)
+ h * 5 + 0xe6546b64
+ }
+
+ /** May optionally be used as the last mixing step. Is a little bit faster than mix,
+ * as it does no further mixing of the resulting hash. For the last element this is not
+ * necessary as the hash is thoroughly mixed during finalization anyway. */
+ final def mixLast(hash: Int, data: Int): Int = {
+ var k = data
+
+ k *= 0xcc9e2d51
+ k = rotl(k, 15)
+ k *= 0x1b873593
+
+ hash ^ k
+ }
+
+ /** Finalize a hash to incorporate the length and make sure all bits avalanche. */
+ final def finalizeHash(hash: Int, length: Int): Int = avalanche(hash ^ length)
+
+ /** Force all bits of the hash to avalanche. Used for finalizing the hash. */
+ private final def avalanche(hash: Int): Int = {
+ var h = hash
+
+ h ^= h >>> 16
+ h *= 0x85ebca6b
+ h ^= h >>> 13
+ h *= 0xc2b2ae35
+ h ^= h >>> 16
+
+ h
+ }
+
+ /** Compute the hash of a product */
+ final def productHash(x: Product, seed: Int = productSeed): Int = {
+ val arr = x.productArity
+ // Case objects have the hashCode inlined directly into the
+ // synthetic hashCode method, but this method should still give
+ // a correct result if passed a case object.
+ if (arr == 0) {
+ x.productPrefix.hashCode
+ }
+ else {
+ var h = seed
+ var i = 0
+ while (i < arr) {
+ h = mix(h, x.productElement(i).##)
+ i += 1
+ }
+ finalizeHash(h, arr)
+ }
+ }
+
+ /** Compute the hash of a string */
+ final def stringHash(str: String, seed: Int = stringSeed): Int = {
+ var h = seed
+ var i = 0
+ while (i + 1 < str.length) {
+ val data = (str.charAt(i) << 16) + str.charAt(i + 1)
+ h = mix(h, data)
+ i += 2
+ }
+ if (i < str.length) h = mixLast(h, str.charAt(i))
+ finalizeHash(h, str.length)
+ }
+
+ /** Compute a hash that is symmetric in its arguments - that is a hash
+ * where the order of appearance of elements does not matter.
+ * This is useful for hashing sets, for example. */
+ final def symmetricHash[T](xs: collection.GenTraversableOnce[T], seed: Int = symmetricSeed): Int = {
+ var a, b, n = 0
+ var c = 1
+ xs.seq.foreach { x =>
+ val h = x.##
+ a += h
+ b ^= h
+ if (h != 0) c *= h
+ n += 1
+ }
+
+ var h = seed
+ h = mix(h, a)
+ h = mix(h, b)
+ h = mixLast(h, c)
+ finalizeHash(h, n)
+ }
+
+ /** Compute a hash for a traversable (once). */
+ final def traversableHash[T](xs: collection.GenTraversableOnce[T], seed: Int = traversableSeed): Int = {
+ var n = 0
+ var h = seed
+ xs.seq.foreach { x =>
+ h = mix(h, x.##)
+ n += 1
+ }
+ finalizeHash(h, n)
+ }
+
+ /** Compute the hash of an array */
+ final def arrayHash[@specialized T](a: Array[T], seed: Int = arraySeed): Int = {
+ var h = seed
+ var i = 0
+ while (i < a.length) {
+ h = mix(h, a(i).##)
+ i += 1
+ }
+ finalizeHash(h, a.length)
+ }
+
+ /** Compute the hash of a byte array. Faster than arrayHash, because
+ * it hashes 4 bytes at once. */
+ final def bytesHash(data: Array[Byte], seed: Int = arraySeed): Int = {
+ var len = data.length
+ var h = seed
+
+ // Body
+ var i = 0
+ while(len >= 4) {
+ var k = data(i + 0) & 0xFF
+ k |= (data(i + 1) & 0xFF) << 8
+ k |= (data(i + 2) & 0xFF) << 16
+ k |= (data(i + 3) & 0xFF) << 24
+
+ h = mix(h, k)
+
+ i += 4
+ len -= 4
+ }
+
+ // Tail
+ var k = 0
+ if(len == 3) k ^= (data(i + 2) & 0xFF) << 16
+ if(len >= 2) k ^= (data(i + 1) & 0xFF) << 8
+ if(len >= 1) {
+ k ^= (data(i + 0) & 0xFF)
+ h = mixLast(h, k)
+ }
+
+ // Finalization
+ finalizeHash(h, data.length)
+ }
+}