From 4215f6bd7d6895e400062c22d80a7ccfab20ff23 Mon Sep 17 00:00:00 2001 From: Paul Phillips Date: Mon, 11 Oct 2010 23:18:19 +0000 Subject: Introduced -Ymurmur with murmur hashcodes. contributed by "archontophoenix", following in the grand tradition of code by people whose actual names I don't know. References #2537, but it doesn't close until some sensible hashcode is used by default. Review by community. --- .../scala/tools/nsc/settings/ScalaSettings.scala | 2 +- .../tools/nsc/typechecker/SyntheticMethods.scala | 6 +- src/library/scala/runtime/ScalaRunTime.scala | 4 +- src/library/scala/util/MurmurHash2.scala | 128 +++++++++++++++++++++ test/files/run/hashCodeDistribution.flags | 1 + test/files/run/hashCodeDistribution.scala | 17 +++ test/pending/run/hashCodeDistribution.flags | 1 - test/pending/run/hashCodeDistribution.scala | 17 --- 8 files changed, 153 insertions(+), 23 deletions(-) create mode 100644 src/library/scala/util/MurmurHash2.scala create mode 100644 test/files/run/hashCodeDistribution.flags create mode 100644 test/files/run/hashCodeDistribution.scala delete mode 100644 test/pending/run/hashCodeDistribution.flags delete mode 100644 test/pending/run/hashCodeDistribution.scala diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index 47d87e02b0..856c5fdb59 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -145,7 +145,7 @@ trait ScalaSettings extends AbsScalaSettings with StandardScalaSettings { val Yrepldebug = BooleanSetting ("-Yrepl-debug", "Trace all repl activity.") val Ycompletion = BooleanSetting ("-Ycompletion-debug", "Trace all tab completion activity.") val Ypmatnaive = BooleanSetting ("-Ypmat-naive", "Desugar matches as naively as possible..") - // val Yjenkins = BooleanSetting ("-Yjenkins-hashCodes", "Use jenkins hash algorithm for case class generated hashCodes.") + val Ymurmur = BooleanSetting ("-Ymurmur", "Use Murmur hash algorithm for case class generated hashCodes.") val Ynotnull = BooleanSetting ("-Ynotnull", "Enable the experimental and incomplete scala.NotNull") // Warnings diff --git a/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala b/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala index c719f5e546..50e150c211 100644 --- a/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala +++ b/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala @@ -136,8 +136,10 @@ trait SyntheticMethods extends ast.TreeDSL { } } - def hashCodeTarget: Name = nme.hashCode_ - // if (settings.Yjenkins.value) "hashCodeJenkins" else nme.hashCode_ + // XXX short term, expecting to make murmur the default as soon as it is put through some paces. + def hashCodeTarget: Name = + if (settings.Ymurmur.value) "hashCodeMurmur" + else nme.hashCode_ /** The equality method for case modules: * def equals(that: Any) = this eq that diff --git a/src/library/scala/runtime/ScalaRunTime.scala b/src/library/scala/runtime/ScalaRunTime.scala index 8e927bb70b..0dd51a0fc7 100644 --- a/src/library/scala/runtime/ScalaRunTime.scala +++ b/src/library/scala/runtime/ScalaRunTime.scala @@ -146,8 +146,8 @@ object ScalaRunTime { def _toString(x: Product): String = x.productIterator.mkString(x.productPrefix + "(", ",", ")") - // def _hashCodeJenkins(x: Product): Int = - // scala.util.JenkinsHash.hashSeq(x.productPrefix.toSeq ++ x.productIterator.toSeq) + def _hashCodeMurmur(x: Product): Int = + scala.util.MurmurHash.product(x) def _hashCode(x: Product): Int = { val arr = x.productArity diff --git a/src/library/scala/util/MurmurHash2.scala b/src/library/scala/util/MurmurHash2.scala new file mode 100644 index 0000000000..50d530d00c --- /dev/null +++ b/src/library/scala/util/MurmurHash2.scala @@ -0,0 +1,128 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2010, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.util + +/** An implementation of MurmurHash 2.0. + * http://sites.google.com/site/murmurhash/ + * + * It is here primarily for use by case classes. + * + * @author archontophoenix + * @version 2.9 + * @since 2.9 + */ + +object MurmurHash { + private def bytesProvided (v: Any) = v match { + case x: Byte => 1 + case x: Short => 2 + case x: Int => 4 + case x: Long => 8 + case x: Float => 4 + case x: Double => 8 + case x: Boolean => 1 + case x: Char => 2 + case x: Unit => 0 + case _ => 4 + } + + private def highInt (v: Any): Int = { + val l = v match { + case x: Long => x + case x: Double => java.lang.Double.doubleToRawLongBits(x) + case _ => throw new IllegalArgumentException("No highInt: " + v) + } + + l >>> 32 toInt + } + + final val m = 0x5bd1e995 + final val r = 24 + + private def step (hh: Int, kk: Int): Int = { + var h = hh + var k = kk + k *= m + k ^= k >>> r + k *= m + h *= m + h ^ k + } + + def product(p: Product): Int = product(p, 0) + def product(p: Product, seed: Int): Int = { + // Initialize the hash to a 'random' value + var n = p.productArity + var h = seed ^ n + var partial = 0 + var nextPartialByte = 0 + while (n > 0) { + n -= 1 + val el: Any = p.productElement(n) + bytesProvided(el) match { + case 0 => + case 1 => + partial |= el.## << (nextPartialByte * 8) + if (nextPartialByte < 3) + nextPartialByte += 1 + else { + h = step(h,partial) + partial = 0 + nextPartialByte = 0 + } + case 2 => + val i = el.## + partial |= i << (nextPartialByte * 8) + if (nextPartialByte < 2) + nextPartialByte += 2 + else { + h = step(h,partial) + nextPartialByte -= 2 + partial = if (nextPartialByte == 0) 0 else i >>> 8 + } + case 4 => + h = step(h, el.##) + case 8 => + h = step(h, el.##) + h = step(h, highInt(el)) + } + } + // Handle the last few bytes of the input array + if (nextPartialByte > 0) { + h ^= partial + h *= m + } + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >>> 13 + h *= m + h ^ (h >>> 15) + } + + def string(s: String): Int = { + // Initialize the hash to a 'random' value + var n = s.length + var h = n + var nn = n & ~ 1 + while (nn > 0) { + nn -= 2 + h = step(h, (s(nn + 1) << 16) | s(nn)) + } + // Handle the last few bytes of the input array + if ((n & 1) != 0) { + h ^= s(n - 1) + h *= m + } + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >>> 13 + h *= m + h ^ (h >>> 15) + } +} diff --git a/test/files/run/hashCodeDistribution.flags b/test/files/run/hashCodeDistribution.flags new file mode 100644 index 0000000000..5f7bd1a5be --- /dev/null +++ b/test/files/run/hashCodeDistribution.flags @@ -0,0 +1 @@ +-Ymurmur \ No newline at end of file diff --git a/test/files/run/hashCodeDistribution.scala b/test/files/run/hashCodeDistribution.scala new file mode 100644 index 0000000000..5be9d1db6d --- /dev/null +++ b/test/files/run/hashCodeDistribution.scala @@ -0,0 +1,17 @@ +// See ticket #2537. +object Test { + case class C(x: Int, y: Int) { } + val COUNT = 300 + val totalCodes = COUNT * COUNT + + def main (args: Array[String]) = { + val hashCodes = + for (x <- 0 until COUNT; y <- 0 until COUNT) yield C(x,y).hashCode + + val uniques = hashCodes.distinct + val collisionRate = (totalCodes - uniques.size) * 1000 / totalCodes + + assert(collisionRate < 5, "Collision rate too high: %d / 1000".format(collisionRate)) + // println("collisionRate = %d / 1000".format(collisionRate)) + } +} \ No newline at end of file diff --git a/test/pending/run/hashCodeDistribution.flags b/test/pending/run/hashCodeDistribution.flags deleted file mode 100644 index 7806652d4d..0000000000 --- a/test/pending/run/hashCodeDistribution.flags +++ /dev/null @@ -1 +0,0 @@ --Yjenkins-hashCodes \ No newline at end of file diff --git a/test/pending/run/hashCodeDistribution.scala b/test/pending/run/hashCodeDistribution.scala deleted file mode 100644 index 5be9d1db6d..0000000000 --- a/test/pending/run/hashCodeDistribution.scala +++ /dev/null @@ -1,17 +0,0 @@ -// See ticket #2537. -object Test { - case class C(x: Int, y: Int) { } - val COUNT = 300 - val totalCodes = COUNT * COUNT - - def main (args: Array[String]) = { - val hashCodes = - for (x <- 0 until COUNT; y <- 0 until COUNT) yield C(x,y).hashCode - - val uniques = hashCodes.distinct - val collisionRate = (totalCodes - uniques.size) * 1000 / totalCodes - - assert(collisionRate < 5, "Collision rate too high: %d / 1000".format(collisionRate)) - // println("collisionRate = %d / 1000".format(collisionRate)) - } -} \ No newline at end of file -- cgit v1.2.3