summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2011-01-25 19:41:34 +0000
committerPaul Phillips <paulp@improving.org>2011-01-25 19:41:34 +0000
commit5f905da8b66fe69b09d8b62c917970cde14e23ba (patch)
treec1670e8ca407b3e3f8a17fc6c42a87364cbd64e9 /src
parentdea65103bf06c0a91527692ac053d0d2cb9a9ddd (diff)
downloadscala-5f905da8b66fe69b09d8b62c917970cde14e23ba.tar.gz
scala-5f905da8b66fe69b09d8b62c917970cde14e23ba.tar.bz2
scala-5f905da8b66fe69b09d8b62c917970cde14e23ba.zip
A new murmur3 hashcode implementation contribut...
A new murmur3 hashcode implementation contributed by rex kerr. I'm really happy with it. There is benchmarking code included but you can use the pudding for proof: the compiler compiling itself is clearly faster. I deleted the existing murmur2 implementation which has never left trunk in a release. There remain some possible points of improvement with inlining and/or synthetic method generation, but this is a major improvement over the status quo. Closes #2537, review by prokopec.
Diffstat (limited to 'src')
-rw-r--r--src/compiler/scala/tools/nsc/settings/ScalaSettings.scala1
-rw-r--r--src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala7
-rw-r--r--src/compiler/scala/tools/nsc/util/BitSet.scala14
-rw-r--r--src/library/scala/collection/Map.scala3
-rw-r--r--src/library/scala/collection/MapLike.scala4
-rw-r--r--src/library/scala/collection/SeqLike.scala6
-rw-r--r--src/library/scala/collection/Set.scala2
-rw-r--r--src/library/scala/collection/SetLike.scala3
-rw-r--r--src/library/scala/runtime/ScalaRunTime.scala14
-rw-r--r--src/library/scala/util/MurmurHash.scala263
-rw-r--r--src/library/scala/xml/Utility.scala20
11 files changed, 208 insertions, 129 deletions
diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
index e10abf6a0d..57de25b694 100644
--- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
+++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
@@ -138,7 +138,6 @@ trait ScalaSettings extends AbsScalaSettings with StandardScalaSettings {
withPostSetHook(set => interpreter._debug = true)
val Ycompletion = BooleanSetting ("-Ycompletion-debug", "Trace all tab completion activity.")
val Ypmatnaive = BooleanSetting ("-Ypmat-naive", "Desugar matches as naively as possible.")
- val Ymurmur = BooleanSetting ("-Ymurmur", "Use Murmur hash algorithm for case class generated hashCodes.")
val Ynotnull = BooleanSetting ("-Ynotnull", "Enable (experimental and incomplete) scala.NotNull.")
val YdepMethTpes = BooleanSetting ("-Ydependent-method-types", "Allow dependent method types.")
val YmethodInfer = BooleanSetting ("-Yinfer-argument-types", "Infer types for arguments of overriden methods.")
diff --git a/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala b/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala
index 8041b7a715..ac259ab7ff 100644
--- a/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala
+++ b/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala
@@ -135,11 +135,6 @@ trait SyntheticMethods extends ast.TreeDSL {
}
}
- // XXX short term, expecting to make murmur the default as soon as it is put through some paces.
- def hashCodeTarget: Name =
- if (settings.Ymurmur.value) "hashCodeMurmur"
- else nme.hashCode_
-
/** The equality method for case modules:
* def equals(that: Any) = this eq that
*/
@@ -257,7 +252,7 @@ trait SyntheticMethods extends ast.TreeDSL {
// methods for case classes only
def classMethods = List(
- Object_hashCode -> (() => forwardingMethod(nme.hashCode_, "_" + hashCodeTarget)),
+ Object_hashCode -> (() => forwardingMethod(nme.hashCode_, "_" + nme.hashCode_)),
Object_toString -> (() => forwardingMethod(nme.toString_, "_" + nme.toString_)),
Object_equals -> (() => equalsClassMethod)
)
diff --git a/src/compiler/scala/tools/nsc/util/BitSet.scala b/src/compiler/scala/tools/nsc/util/BitSet.scala
index 59d6d51219..a8d54c2c84 100644
--- a/src/compiler/scala/tools/nsc/util/BitSet.scala
+++ b/src/compiler/scala/tools/nsc/util/BitSet.scala
@@ -72,12 +72,20 @@ abstract class BitSet {
}
override def hashCode: Int = {
- var h = hashSeed
+ import scala.util.MurmurHash._
+ var h = startHash(hashSeed)
+ var c = startMagicA
+ var k = startMagicB
for (idx <- 0 until nwords) {
val w = word(idx)
- h = (h * 41 + (w >>> 32).toInt) * 41 + w.toInt
+ h = extendHash(h, (w>>>32).toInt, c, k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
+ h = extendHash(h, w.toInt, c, k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
}
- h
+ finalizeHash(h)
}
def addString(sb: StringBuilder, start: String, sep: String, end: String) {
diff --git a/src/library/scala/collection/Map.scala b/src/library/scala/collection/Map.scala
index b3dee2ff8c..c32b04f2ed 100644
--- a/src/library/scala/collection/Map.scala
+++ b/src/library/scala/collection/Map.scala
@@ -35,6 +35,9 @@ trait Map[A, +B] extends Iterable[(A, B)] with MapLike[A, B, Map[A, B]] {
* @define coll map
*/
object Map extends MapFactory[Map] {
+
+ private[collection] val hashSeed = "Map".hashCode
+
def empty[A, B]: immutable.Map[A, B] = immutable.Map.empty
/** $mapCanBuildFromInfo */
diff --git a/src/library/scala/collection/MapLike.scala b/src/library/scala/collection/MapLike.scala
index ce78fb705a..0d012d01b7 100644
--- a/src/library/scala/collection/MapLike.scala
+++ b/src/library/scala/collection/MapLike.scala
@@ -334,7 +334,9 @@ self =>
override /*PartialFunction*/
def toString = super[IterableLike].toString
- override def hashCode() = this map (_.##) sum
+ // This hash code must be symmetric in the contents but ought not
+ // collide trivially.
+ override def hashCode() = util.MurmurHash.symmetricHash(this,Map.hashSeed)
/** Compares two maps structurally; i.e. checks if all mappings
* contained in this map are also contained in the other map,
diff --git a/src/library/scala/collection/SeqLike.scala b/src/library/scala/collection/SeqLike.scala
index 49098da47c..12e4c45c75 100644
--- a/src/library/scala/collection/SeqLike.scala
+++ b/src/library/scala/collection/SeqLike.scala
@@ -987,7 +987,11 @@ trait SeqLike[+A, +Repr] extends IterableLike[A, Repr] { self =>
/** Hashcodes for $Coll produce a value from the hashcodes of all the
* elements of the $coll.
*/
- override def hashCode() = (Seq.hashSeed /: this)(_ * 41 + _.##)
+ override def hashCode() = {
+ val h = new util.MurmurHash[A](Seq.hashSeed)
+ this.foreach(h)
+ h.hash
+ }
/** The equals method for arbitrary sequences. Compares this sequence to
* some other object.
diff --git a/src/library/scala/collection/Set.scala b/src/library/scala/collection/Set.scala
index b012836a78..412ebf0e25 100644
--- a/src/library/scala/collection/Set.scala
+++ b/src/library/scala/collection/Set.scala
@@ -36,6 +36,8 @@ trait Set[A] extends (A => Boolean)
* @define Coll Set
*/
object Set extends SetFactory[Set] {
+ private[collection] val hashSeed = "Set".hashCode
+
def newBuilder[A] = immutable.Set.newBuilder[A]
override def empty[A]: Set[A] = immutable.Set.empty[A]
implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, Set[A]] = setCanBuildFrom[A]
diff --git a/src/library/scala/collection/SetLike.scala b/src/library/scala/collection/SetLike.scala
index 71c12be778..73f0b9c839 100644
--- a/src/library/scala/collection/SetLike.scala
+++ b/src/library/scala/collection/SetLike.scala
@@ -290,7 +290,8 @@ self =>
// override def hashCode() = this map (_.hashCode) sum
// Calling map on a set drops duplicates: any hashcode collisions would
// then be dropped before they can be added.
- override def hashCode() = this.foldLeft(0)(_ + _.hashCode)
+ // Hash should be symmetric in set entries, but without trivial collisions.
+ override def hashCode() = util.MurmurHash.symmetricHash(this,Set.hashSeed)
/** Compares this set with another object for equality.
*
diff --git a/src/library/scala/runtime/ScalaRunTime.scala b/src/library/scala/runtime/ScalaRunTime.scala
index 956147ab4a..7a7afbcd88 100644
--- a/src/library/scala/runtime/ScalaRunTime.scala
+++ b/src/library/scala/runtime/ScalaRunTime.scala
@@ -157,19 +157,21 @@ object ScalaRunTime {
def _toString(x: Product): String =
x.productIterator.mkString(x.productPrefix + "(", ",", ")")
- def _hashCodeMurmur(x: Product): Int =
- scala.util.MurmurHash.product(x)
-
def _hashCode(x: Product): Int = {
+ import scala.util.MurmurHash._
val arr = x.productArity
- var code = arr
+ var h = startHash(arr)
+ var c = startMagicA
+ var k = startMagicB
var i = 0
while (i < arr) {
val elem = x.productElement(i)
- code = code * 41 + (if (elem == null) 0 else elem.##)
+ h = extendHash(h, if (elem == null) 0 else elem.##, c, k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
i += 1
}
- code
+ finalizeHash(h)
}
/** Fast path equality method for inlining; used when -optimise is set.
diff --git a/src/library/scala/util/MurmurHash.scala b/src/library/scala/util/MurmurHash.scala
index 8ed91796a1..b6efde6c89 100644
--- a/src/library/scala/util/MurmurHash.scala
+++ b/src/library/scala/util/MurmurHash.scala
@@ -8,121 +8,188 @@
package scala.util
-/** An implementation of MurmurHash 2.0.
- * http://sites.google.com/site/murmurhash/
+/** An implementation of Austin Appleby's MurmurHash 3.0 algorithm
+ * (32 bit version); reference: http://code.google.com/p/smhasher
*
- * It is here primarily for use by case classes.
+ * This is the hash used by collections and case classes (including
+ * tuples).
*
- * @author archontophoenix
+ * @author Rex Kerr
* @version 2.9
* @since 2.9
*/
-object MurmurHash {
- private def bytesProvided (v: Any) = v match {
- case x: Byte => 1
- case x: Short => 2
- case x: Int => 4
- case x: Long => 8
- case x: Float => 4
- case x: Double => 8
- case x: Boolean => 1
- case x: Char => 2
- case x: Unit => 0
- case _ => 4
+import java.lang.Integer.{ rotateLeft => rotl }
+
+/** A class designed to generate well-distributed non-cryptographic
+ * hashes. It is designed to be passed to a collection's foreach method,
+ * or can take individual hash values with append. Its own hash code is
+ * set equal to the hash code of whatever it is hashing.
+ */
+class MurmurHash[@specialized(Int,Long,Float,Double) T](seed: Int) extends (T => Unit) {
+ import MurmurHash._
+
+ private var h = startHash(seed)
+ private var c = hiddenMagicA
+ private var k = hiddenMagicB
+ private var hashed = false
+ private var hashvalue = h
+
+ /** Begin a new hash using the same seed. */
+ def reset() {
+ h = startHash(seed)
+ c = hiddenMagicA
+ k = hiddenMagicB
+ hashed = false
+ }
+
+ /** Incorporate the hash value of one item. */
+ def apply(t: T) {
+ h = extendHash(h,t.##,c,k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
+ hashed = false
}
- private def highInt (v: Any): Int = {
- val l = v match {
- case x: Long => x
- case x: Double => java.lang.Double.doubleToRawLongBits(x)
- case _ => throw new IllegalArgumentException("No highInt: " + v)
+ /** Incorporate a known hash value. */
+ def append(i: Int) {
+ h = extendHash(h,i,c,k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
+ hashed = false
+ }
+
+ /** Retrieve the hash value */
+ def hash = {
+ if (!hashed) {
+ hashvalue = finalizeHash(h)
+ hashed = true
}
+ hashvalue
+ }
+ override def hashCode = hash
+}
- l >>> 32 toInt
+/** An object designed to generate well-distributed non-cryptographic
+ * hashes. It is designed to hash a collection of integers; along with
+ * the integers to hash, it generates two magic streams of integers to
+ * increase the distribution of repetitive input sequences. Thus,
+ * three methods need to be called at each step (to start and to
+ * incorporate a new integer) to update the values. Only one method
+ * needs to be called to finalize the hash.
+ */
+
+object MurmurHash {
+ // Magic values used for MurmurHash's 32 bit hash.
+ // Don't change these without consulting a hashing expert!
+ final private val visibleMagic = 0x971e137b
+ final private val hiddenMagicA = 0x95543787
+ final private val hiddenMagicB = 0x2ad7eb25
+ final private val visibleMixer = 0x52dce729
+ final private val hiddenMixerA = 0x7b7d159c
+ final private val hiddenMixerB = 0x6bce6396
+ final private val finalMixer1 = 0x85ebca6b
+ final private val finalMixer2 = 0xc2b2ae35
+
+ // Arbitrary values used for hashing certain classes
+ final private val seedString = 0xf7ca7fd2
+ final private val seedArray = 0x3c074a61
+
+ /** The first 23 magic integers from the first stream are stored here */
+ val storedMagicA =
+ Iterator.iterate(hiddenMagicA)(nextMagicA).take(23).toArray
+
+ /** The first 23 magic integers from the second stream are stored here */
+ val storedMagicB =
+ Iterator.iterate(hiddenMagicB)(nextMagicB).take(23).toArray
+
+ /** Begin a new hash with a seed value. */
+ def startHash(seed: Int) = seed ^ visibleMagic
+
+ /** The initial magic integers in the first stream. */
+ def startMagicA = hiddenMagicA
+
+ /** The initial magic integer in the second stream. */
+ def startMagicB = hiddenMagicB
+
+ /** Incorporates a new value into an existing hash.
+ *
+ * @param hash the prior hash value
+ * @param value the new value to incorporate
+ * @param magicA a magic integer from the stream
+ * @param magicB a magic integer from a different stream
+ * @return the updated hash value
+ */
+ def extendHash(hash: Int, value: Int, magicA: Int, magicB: Int) = {
+ (hash ^ rotl(value*magicA,11)*magicB)*3 + visibleMixer
}
- final val m = 0x5bd1e995
- final val r = 24
-
- private def step (hh: Int, kk: Int): Int = {
- var h = hh
- var k = kk
- k *= m
- k ^= k >>> r
- k *= m
- h *= m
- h ^ k
+ /** Given a magic integer from the first stream, compute the next */
+ def nextMagicA(magicA: Int) = magicA*5 + hiddenMixerA
+
+ /** Given a magic integer from the second stream, compute the next */
+ def nextMagicB(magicB: Int) = magicB*5 + hiddenMixerB
+
+ /** Once all hashes have been incorporated, this performs a final mixing */
+ def finalizeHash(hash: Int) = {
+ var i = (hash ^ (hash>>>16))
+ i *= finalMixer1
+ i ^= (i >>> 13)
+ i *= finalMixer2
+ i ^= (i >>> 16)
+ i
}
- def product(p: Product): Int = product(p, 0)
- def product(p: Product, seed: Int): Int = {
- // Initialize the hash to a 'random' value
- var n = p.productArity
- var h = seed ^ n
- var partial = 0
- var nextPartialByte = 0
- while (n > 0) {
- n -= 1
- val el: Any = p.productElement(n)
- bytesProvided(el) match {
- case 0 =>
- case 1 =>
- partial |= el.## << (nextPartialByte * 8)
- if (nextPartialByte < 3)
- nextPartialByte += 1
- else {
- h = step(h,partial)
- partial = 0
- nextPartialByte = 0
- }
- case 2 =>
- val i = el.##
- partial |= i << (nextPartialByte * 8)
- if (nextPartialByte < 2)
- nextPartialByte += 2
- else {
- h = step(h,partial)
- nextPartialByte -= 2
- partial = if (nextPartialByte == 0) 0 else i >>> 8
- }
- case 4 =>
- h = step(h, el.##)
- case 8 =>
- h = step(h, el.##)
- h = step(h, highInt(el))
- }
- }
- // Handle the last few bytes of the input array
- if (nextPartialByte > 0) {
- h ^= partial
- h *= m
+ /** Compute a high-quality hash of an array */
+ def arrayHash[@specialized T](a: Array[T]) = {
+ var h = startHash(a.length * seedArray)
+ var c = hiddenMagicA
+ var k = hiddenMagicB
+ var j = 0
+ while (j < a.length) {
+ h = extendHash(h, a(j).##, c, k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
+ j += 1
}
- // Do a few final mixes of the hash to ensure the last few
- // bytes are well-incorporated.
- h ^= h >>> 13
- h *= m
- h ^ (h >>> 15)
+ finalizeHash(h)
}
- def string(s: String): Int = {
- // Initialize the hash to a 'random' value
- var n = s.length
- var h = n
- var nn = n & ~ 1
- while (nn > 0) {
- nn -= 2
- h = step(h, (s(nn + 1) << 16) | s(nn))
+ /** Compute a high-quality hash of a string */
+ def stringHash(s: String) = {
+ var h = startHash(s.length * seedString)
+ var c = hiddenMagicA
+ var k = hiddenMagicB
+ var j = 0
+ while (j+1 < s.length) {
+ val i = (s.charAt(j)<<16) + s.charAt(j+1);
+ h = extendHash(h,i,c,k)
+ c = nextMagicA(c)
+ k = nextMagicB(k)
+ j += 2
}
- // Handle the last few bytes of the input array
- if ((n & 1) != 0) {
- h ^= s(n - 1)
- h *= m
- }
- // Do a few final mixes of the hash to ensure the last few
- // bytes are well-incorporated.
- h ^= h >>> 13
- h *= m
- h ^ (h >>> 15)
+ if (j < s.length) h = extendHash(h,s.charAt(j),c,k)
+ finalizeHash(h)
+ }
+
+ /** Compute a hash that is symmetric in its arguments--that is,
+ * where the order of appearance of elements does not matter.
+ * This is useful for hashing sets, for example.
+ */
+ def symmetricHash[T](xs: TraversableOnce[T], seed: Int) = {
+ var a,b,n = 0
+ var c = 1
+ xs.foreach(i => {
+ val h = i.##
+ a += h
+ b ^= h
+ if (h != 0) c *= h
+ n += 1
+ })
+ var h = startHash(seed * n)
+ h = extendHash(h, a, storedMagicA(0), storedMagicB(0))
+ h = extendHash(h, b, storedMagicA(1), storedMagicB(1))
+ h = extendHash(h, c, storedMagicA(2), storedMagicB(2))
+ finalizeHash(h)
}
}
diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala
index 02eeb4e84b..2d0682a47d 100644
--- a/src/library/scala/xml/Utility.scala
+++ b/src/library/scala/xml/Utility.scala
@@ -260,18 +260,14 @@ object Utility extends AnyRef with parsing.TokenTests
* @param attribHashCode
* @param children
*/
- def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = (
- ( if(pre ne null) {41 * pre.## % 7} else {0})
- + label.## * 53
- + attribHashCode * 7
- + scpeHash * 31
- + {
- var c = 0
- val i = children.iterator
- while(i.hasNext) c = c * 41 + i.next.##
- c
- }
- )
+ def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = {
+ val h = new util.MurmurHash[Node]( if (pre ne null) pre.## else 0 )
+ h.append(label.##)
+ h.append(attribHashCode)
+ h.append(scpeHash)
+ children.foreach(h)
+ h.hash
+ }
def appendQuoted(s: String): String = sbToString(appendQuoted(s, _))