summaryrefslogtreecommitdiff
path: root/src/library/scala/util/MurmurHash3.scala
diff options
context:
space:
mode:
Diffstat (limited to 'src/library/scala/util/MurmurHash3.scala')
-rw-r--r--src/library/scala/util/MurmurHash3.scala105
1 files changed, 81 insertions, 24 deletions
diff --git a/src/library/scala/util/MurmurHash3.scala b/src/library/scala/util/MurmurHash3.scala
index 0fd8b2e123..33d9d2f0e5 100644
--- a/src/library/scala/util/MurmurHash3.scala
+++ b/src/library/scala/util/MurmurHash3.scala
@@ -2,7 +2,6 @@ package scala.util
import java.lang.Integer.{ rotateLeft => rotl }
-
/**
* An implementation of Austin Appleby's MurmurHash 3 algorithm
* (MurmurHash3_x86_32).
@@ -22,15 +21,7 @@ import java.lang.Integer.{ rotateLeft => rotl }
*
* @see http://code.google.com/p/smhasher
*/
-object MurmurHash3 {
-
- // Some arbitrary values used as hash seeds
- final val arraySeed = 0x3c074a61
- final val stringSeed = 0xf7ca7fd2
- final val productSeed = 0xcafebabe
- final val symmetricSeed = 0xb592f7ae
- final val traversableSeed = 0xe73a8b15
-
+class MurmurHash3 {
/** Mix in a block of data into an intermediate hash value. */
final def mix(hash: Int, data: Int): Int = {
var h = mixLast(hash, data)
@@ -68,7 +59,7 @@ object MurmurHash3 {
}
/** Compute the hash of a product */
- final def productHash(x: Product, seed: Int = productSeed): Int = {
+ final def productHash(x: Product, seed: Int): Int = {
val arr = x.productArity
// Case objects have the hashCode inlined directly into the
// synthetic hashCode method, but this method should still give
@@ -88,7 +79,7 @@ object MurmurHash3 {
}
/** Compute the hash of a string */
- final def stringHash(str: String, seed: Int = stringSeed): Int = {
+ final def stringHash(str: String, seed: Int): Int = {
var h = seed
var i = 0
while (i + 1 < str.length) {
@@ -102,38 +93,39 @@ object MurmurHash3 {
/** Compute a hash that is symmetric in its arguments - that is a hash
* where the order of appearance of elements does not matter.
- * This is useful for hashing sets, for example. */
- final def symmetricHash[T](xs: collection.GenTraversableOnce[T], seed: Int = symmetricSeed): Int = {
+ * This is useful for hashing sets, for example.
+ */
+ final def unorderedHash(xs: TraversableOnce[Any], seed: Int): Int = {
var a, b, n = 0
var c = 1
- xs.seq.foreach { x =>
+ xs foreach { x =>
val h = x.##
a += h
b ^= h
if (h != 0) c *= h
n += 1
}
-
var h = seed
h = mix(h, a)
h = mix(h, b)
h = mixLast(h, c)
finalizeHash(h, n)
}
-
- /** Compute a hash for a traversable (once). */
- final def traversableHash[T](xs: collection.GenTraversableOnce[T], seed: Int = traversableSeed): Int = {
+ /** Compute a hash that depends on the order of its arguments.
+ */
+ final def orderedHash(xs: TraversableOnce[Any], seed: Int): Int = {
var n = 0
var h = seed
- xs.seq.foreach { x =>
+ xs foreach { x =>
h = mix(h, x.##)
n += 1
}
finalizeHash(h, n)
}
- /** Compute the hash of an array */
- final def arrayHash[@specialized T](a: Array[T], seed: Int = arraySeed): Int = {
+ /** Compute the hash of an array.
+ */
+ final def arrayHash[@specialized T](a: Array[T], seed: Int): Int = {
var h = seed
var i = 0
while (i < a.length) {
@@ -144,8 +136,9 @@ object MurmurHash3 {
}
/** Compute the hash of a byte array. Faster than arrayHash, because
- * it hashes 4 bytes at once. */
- final def bytesHash(data: Array[Byte], seed: Int = arraySeed): Int = {
+ * it hashes 4 bytes at once.
+ */
+ final def bytesHash(data: Array[Byte], seed: Int): Int = {
var len = data.length
var h = seed
@@ -176,3 +169,67 @@ object MurmurHash3 {
finalizeHash(h, data.length)
}
}
+
+/**
+ * An instance of MurmurHash3 with predefined seeds for various
+ * classes. Used by all the scala collections and case classes.
+ */
+object MurmurHash3 extends MurmurHash3 {
+ final val arraySeed = 0x3c074a61
+ final val stringSeed = 0xf7ca7fd2
+ final val productSeed = 0xcafebabe
+ final val symmetricSeed = 0xb592f7ae
+ final val traversableSeed = 0xe73a8b15
+ final val seqSeed = "Seq".hashCode
+ final val mapSeed = "Map".hashCode
+ final val setSeed = "Set".hashCode
+
+ def arrayHash[@specialized T](a: Array[T]): Int = arrayHash(a, arraySeed)
+ def bytesHash(data: Array[Byte]): Int = bytesHash(data, arraySeed)
+ def orderedHash(xs: TraversableOnce[Any]): Int = orderedHash(xs, symmetricSeed)
+ def productHash(x: Product): Int = productHash(x, productSeed)
+ def stringHash(x: String): Int = stringHash(x, stringSeed)
+ def unorderedHash(xs: TraversableOnce[Any]): Int = unorderedHash(xs, traversableSeed)
+
+ /** To offer some potential for optimization.
+ */
+ def seqHash(xs: collection.Seq[_]): Int = orderedHash(xs, seqSeed)
+ def mapHash(xs: collection.Map[_, _]): Int = unorderedHash(xs, mapSeed)
+ def setHash(xs: collection.Set[_]): Int = unorderedHash(xs, setSeed)
+
+ /** All this trouble and foreach still appears faster.
+ * Leaving in place in case someone would like to investigate further.
+ */
+ /**
+ def linearSeqHash(xs: collection.LinearSeq[_], seed: Int): Int = {
+ var n = 0
+ var h = seed
+ var elems = xs
+ while (elems.nonEmpty) {
+ h = mix(h, elems.head.##)
+ n += 1
+ elems = elems.tail
+ }
+ finalizeHash(h, n)
+ }
+
+ def indexedSeqHash(xs: collection.IndexedSeq[_], seed: Int): Int = {
+ var n = 0
+ var h = seed
+ val len = xs.length
+ while (n < len) {
+ h = mix(h, xs(n).##)
+ n += 1
+ }
+ finalizeHash(h, n)
+ }
+ */
+
+ @deprecated("Use unorderedHash", "2.10.0")
+ final def symmetricHash[T](xs: collection.GenTraversableOnce[T], seed: Int = symmetricSeed): Int =
+ unorderedHash(xs.seq, seed)
+
+ @deprecated("Use orderedHash", "2.10.0")
+ final def traversableHash[T](xs: collection.GenTraversableOnce[T], seed: Int = traversableSeed): Int =
+ orderedHash(xs.seq, seed)
+}