diff options
-rw-r--r-- | src/compiler/scala/tools/nsc/settings/ScalaSettings.scala | 1 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala | 7 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/util/BitSet.scala | 14 | ||||
-rw-r--r-- | src/library/scala/collection/Map.scala | 3 | ||||
-rw-r--r-- | src/library/scala/collection/MapLike.scala | 4 | ||||
-rw-r--r-- | src/library/scala/collection/SeqLike.scala | 6 | ||||
-rw-r--r-- | src/library/scala/collection/Set.scala | 2 | ||||
-rw-r--r-- | src/library/scala/collection/SetLike.scala | 3 | ||||
-rw-r--r-- | src/library/scala/runtime/ScalaRunTime.scala | 14 | ||||
-rw-r--r-- | src/library/scala/util/MurmurHash.scala | 263 | ||||
-rw-r--r-- | src/library/scala/xml/Utility.scala | 20 | ||||
-rw-r--r-- | test/benchmarks/src/scala/util/HashSpeedTest.scala | 241 | ||||
-rw-r--r-- | test/files/run/hashCodeDistribution.flags | 1 |
13 files changed, 449 insertions, 130 deletions
diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index e10abf6a0d..57de25b694 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -138,7 +138,6 @@ trait ScalaSettings extends AbsScalaSettings with StandardScalaSettings { withPostSetHook(set => interpreter._debug = true) val Ycompletion = BooleanSetting ("-Ycompletion-debug", "Trace all tab completion activity.") val Ypmatnaive = BooleanSetting ("-Ypmat-naive", "Desugar matches as naively as possible.") - val Ymurmur = BooleanSetting ("-Ymurmur", "Use Murmur hash algorithm for case class generated hashCodes.") val Ynotnull = BooleanSetting ("-Ynotnull", "Enable (experimental and incomplete) scala.NotNull.") val YdepMethTpes = BooleanSetting ("-Ydependent-method-types", "Allow dependent method types.") val YmethodInfer = BooleanSetting ("-Yinfer-argument-types", "Infer types for arguments of overriden methods.") diff --git a/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala b/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala index 8041b7a715..ac259ab7ff 100644 --- a/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala +++ b/src/compiler/scala/tools/nsc/typechecker/SyntheticMethods.scala @@ -135,11 +135,6 @@ trait SyntheticMethods extends ast.TreeDSL { } } - // XXX short term, expecting to make murmur the default as soon as it is put through some paces. - def hashCodeTarget: Name = - if (settings.Ymurmur.value) "hashCodeMurmur" - else nme.hashCode_ - /** The equality method for case modules: * def equals(that: Any) = this eq that */ @@ -257,7 +252,7 @@ trait SyntheticMethods extends ast.TreeDSL { // methods for case classes only def classMethods = List( - Object_hashCode -> (() => forwardingMethod(nme.hashCode_, "_" + hashCodeTarget)), + Object_hashCode -> (() => forwardingMethod(nme.hashCode_, "_" + nme.hashCode_)), Object_toString -> (() => forwardingMethod(nme.toString_, "_" + nme.toString_)), Object_equals -> (() => equalsClassMethod) ) diff --git a/src/compiler/scala/tools/nsc/util/BitSet.scala b/src/compiler/scala/tools/nsc/util/BitSet.scala index 59d6d51219..a8d54c2c84 100644 --- a/src/compiler/scala/tools/nsc/util/BitSet.scala +++ b/src/compiler/scala/tools/nsc/util/BitSet.scala @@ -72,12 +72,20 @@ abstract class BitSet { } override def hashCode: Int = { - var h = hashSeed + import scala.util.MurmurHash._ + var h = startHash(hashSeed) + var c = startMagicA + var k = startMagicB for (idx <- 0 until nwords) { val w = word(idx) - h = (h * 41 + (w >>> 32).toInt) * 41 + w.toInt + h = extendHash(h, (w>>>32).toInt, c, k) + c = nextMagicA(c) + k = nextMagicB(k) + h = extendHash(h, w.toInt, c, k) + c = nextMagicA(c) + k = nextMagicB(k) } - h + finalizeHash(h) } def addString(sb: StringBuilder, start: String, sep: String, end: String) { diff --git a/src/library/scala/collection/Map.scala b/src/library/scala/collection/Map.scala index b3dee2ff8c..c32b04f2ed 100644 --- a/src/library/scala/collection/Map.scala +++ b/src/library/scala/collection/Map.scala @@ -35,6 +35,9 @@ trait Map[A, +B] extends Iterable[(A, B)] with MapLike[A, B, Map[A, B]] { * @define coll map */ object Map extends MapFactory[Map] { + + private[collection] val hashSeed = "Map".hashCode + def empty[A, B]: immutable.Map[A, B] = immutable.Map.empty /** $mapCanBuildFromInfo */ diff --git a/src/library/scala/collection/MapLike.scala b/src/library/scala/collection/MapLike.scala index ce78fb705a..0d012d01b7 100644 --- a/src/library/scala/collection/MapLike.scala +++ b/src/library/scala/collection/MapLike.scala @@ -334,7 +334,9 @@ self => override /*PartialFunction*/ def toString = super[IterableLike].toString - override def hashCode() = this map (_.##) sum + // This hash code must be symmetric in the contents but ought not + // collide trivially. + override def hashCode() = util.MurmurHash.symmetricHash(this,Map.hashSeed) /** Compares two maps structurally; i.e. checks if all mappings * contained in this map are also contained in the other map, diff --git a/src/library/scala/collection/SeqLike.scala b/src/library/scala/collection/SeqLike.scala index 49098da47c..12e4c45c75 100644 --- a/src/library/scala/collection/SeqLike.scala +++ b/src/library/scala/collection/SeqLike.scala @@ -987,7 +987,11 @@ trait SeqLike[+A, +Repr] extends IterableLike[A, Repr] { self => /** Hashcodes for $Coll produce a value from the hashcodes of all the * elements of the $coll. */ - override def hashCode() = (Seq.hashSeed /: this)(_ * 41 + _.##) + override def hashCode() = { + val h = new util.MurmurHash[A](Seq.hashSeed) + this.foreach(h) + h.hash + } /** The equals method for arbitrary sequences. Compares this sequence to * some other object. diff --git a/src/library/scala/collection/Set.scala b/src/library/scala/collection/Set.scala index b012836a78..412ebf0e25 100644 --- a/src/library/scala/collection/Set.scala +++ b/src/library/scala/collection/Set.scala @@ -36,6 +36,8 @@ trait Set[A] extends (A => Boolean) * @define Coll Set */ object Set extends SetFactory[Set] { + private[collection] val hashSeed = "Set".hashCode + def newBuilder[A] = immutable.Set.newBuilder[A] override def empty[A]: Set[A] = immutable.Set.empty[A] implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, Set[A]] = setCanBuildFrom[A] diff --git a/src/library/scala/collection/SetLike.scala b/src/library/scala/collection/SetLike.scala index 71c12be778..73f0b9c839 100644 --- a/src/library/scala/collection/SetLike.scala +++ b/src/library/scala/collection/SetLike.scala @@ -290,7 +290,8 @@ self => // override def hashCode() = this map (_.hashCode) sum // Calling map on a set drops duplicates: any hashcode collisions would // then be dropped before they can be added. - override def hashCode() = this.foldLeft(0)(_ + _.hashCode) + // Hash should be symmetric in set entries, but without trivial collisions. + override def hashCode() = util.MurmurHash.symmetricHash(this,Set.hashSeed) /** Compares this set with another object for equality. * diff --git a/src/library/scala/runtime/ScalaRunTime.scala b/src/library/scala/runtime/ScalaRunTime.scala index 956147ab4a..7a7afbcd88 100644 --- a/src/library/scala/runtime/ScalaRunTime.scala +++ b/src/library/scala/runtime/ScalaRunTime.scala @@ -157,19 +157,21 @@ object ScalaRunTime { def _toString(x: Product): String = x.productIterator.mkString(x.productPrefix + "(", ",", ")") - def _hashCodeMurmur(x: Product): Int = - scala.util.MurmurHash.product(x) - def _hashCode(x: Product): Int = { + import scala.util.MurmurHash._ val arr = x.productArity - var code = arr + var h = startHash(arr) + var c = startMagicA + var k = startMagicB var i = 0 while (i < arr) { val elem = x.productElement(i) - code = code * 41 + (if (elem == null) 0 else elem.##) + h = extendHash(h, if (elem == null) 0 else elem.##, c, k) + c = nextMagicA(c) + k = nextMagicB(k) i += 1 } - code + finalizeHash(h) } /** Fast path equality method for inlining; used when -optimise is set. diff --git a/src/library/scala/util/MurmurHash.scala b/src/library/scala/util/MurmurHash.scala index 8ed91796a1..b6efde6c89 100644 --- a/src/library/scala/util/MurmurHash.scala +++ b/src/library/scala/util/MurmurHash.scala @@ -8,121 +8,188 @@ package scala.util -/** An implementation of MurmurHash 2.0. - * http://sites.google.com/site/murmurhash/ +/** An implementation of Austin Appleby's MurmurHash 3.0 algorithm + * (32 bit version); reference: http://code.google.com/p/smhasher * - * It is here primarily for use by case classes. + * This is the hash used by collections and case classes (including + * tuples). * - * @author archontophoenix + * @author Rex Kerr * @version 2.9 * @since 2.9 */ -object MurmurHash { - private def bytesProvided (v: Any) = v match { - case x: Byte => 1 - case x: Short => 2 - case x: Int => 4 - case x: Long => 8 - case x: Float => 4 - case x: Double => 8 - case x: Boolean => 1 - case x: Char => 2 - case x: Unit => 0 - case _ => 4 +import java.lang.Integer.{ rotateLeft => rotl } + +/** A class designed to generate well-distributed non-cryptographic + * hashes. It is designed to be passed to a collection's foreach method, + * or can take individual hash values with append. Its own hash code is + * set equal to the hash code of whatever it is hashing. + */ +class MurmurHash[@specialized(Int,Long,Float,Double) T](seed: Int) extends (T => Unit) { + import MurmurHash._ + + private var h = startHash(seed) + private var c = hiddenMagicA + private var k = hiddenMagicB + private var hashed = false + private var hashvalue = h + + /** Begin a new hash using the same seed. */ + def reset() { + h = startHash(seed) + c = hiddenMagicA + k = hiddenMagicB + hashed = false + } + + /** Incorporate the hash value of one item. */ + def apply(t: T) { + h = extendHash(h,t.##,c,k) + c = nextMagicA(c) + k = nextMagicB(k) + hashed = false } - private def highInt (v: Any): Int = { - val l = v match { - case x: Long => x - case x: Double => java.lang.Double.doubleToRawLongBits(x) - case _ => throw new IllegalArgumentException("No highInt: " + v) + /** Incorporate a known hash value. */ + def append(i: Int) { + h = extendHash(h,i,c,k) + c = nextMagicA(c) + k = nextMagicB(k) + hashed = false + } + + /** Retrieve the hash value */ + def hash = { + if (!hashed) { + hashvalue = finalizeHash(h) + hashed = true } + hashvalue + } + override def hashCode = hash +} - l >>> 32 toInt +/** An object designed to generate well-distributed non-cryptographic + * hashes. It is designed to hash a collection of integers; along with + * the integers to hash, it generates two magic streams of integers to + * increase the distribution of repetitive input sequences. Thus, + * three methods need to be called at each step (to start and to + * incorporate a new integer) to update the values. Only one method + * needs to be called to finalize the hash. + */ + +object MurmurHash { + // Magic values used for MurmurHash's 32 bit hash. + // Don't change these without consulting a hashing expert! + final private val visibleMagic = 0x971e137b + final private val hiddenMagicA = 0x95543787 + final private val hiddenMagicB = 0x2ad7eb25 + final private val visibleMixer = 0x52dce729 + final private val hiddenMixerA = 0x7b7d159c + final private val hiddenMixerB = 0x6bce6396 + final private val finalMixer1 = 0x85ebca6b + final private val finalMixer2 = 0xc2b2ae35 + + // Arbitrary values used for hashing certain classes + final private val seedString = 0xf7ca7fd2 + final private val seedArray = 0x3c074a61 + + /** The first 23 magic integers from the first stream are stored here */ + val storedMagicA = + Iterator.iterate(hiddenMagicA)(nextMagicA).take(23).toArray + + /** The first 23 magic integers from the second stream are stored here */ + val storedMagicB = + Iterator.iterate(hiddenMagicB)(nextMagicB).take(23).toArray + + /** Begin a new hash with a seed value. */ + def startHash(seed: Int) = seed ^ visibleMagic + + /** The initial magic integers in the first stream. */ + def startMagicA = hiddenMagicA + + /** The initial magic integer in the second stream. */ + def startMagicB = hiddenMagicB + + /** Incorporates a new value into an existing hash. + * + * @param hash the prior hash value + * @param value the new value to incorporate + * @param magicA a magic integer from the stream + * @param magicB a magic integer from a different stream + * @return the updated hash value + */ + def extendHash(hash: Int, value: Int, magicA: Int, magicB: Int) = { + (hash ^ rotl(value*magicA,11)*magicB)*3 + visibleMixer } - final val m = 0x5bd1e995 - final val r = 24 - - private def step (hh: Int, kk: Int): Int = { - var h = hh - var k = kk - k *= m - k ^= k >>> r - k *= m - h *= m - h ^ k + /** Given a magic integer from the first stream, compute the next */ + def nextMagicA(magicA: Int) = magicA*5 + hiddenMixerA + + /** Given a magic integer from the second stream, compute the next */ + def nextMagicB(magicB: Int) = magicB*5 + hiddenMixerB + + /** Once all hashes have been incorporated, this performs a final mixing */ + def finalizeHash(hash: Int) = { + var i = (hash ^ (hash>>>16)) + i *= finalMixer1 + i ^= (i >>> 13) + i *= finalMixer2 + i ^= (i >>> 16) + i } - def product(p: Product): Int = product(p, 0) - def product(p: Product, seed: Int): Int = { - // Initialize the hash to a 'random' value - var n = p.productArity - var h = seed ^ n - var partial = 0 - var nextPartialByte = 0 - while (n > 0) { - n -= 1 - val el: Any = p.productElement(n) - bytesProvided(el) match { - case 0 => - case 1 => - partial |= el.## << (nextPartialByte * 8) - if (nextPartialByte < 3) - nextPartialByte += 1 - else { - h = step(h,partial) - partial = 0 - nextPartialByte = 0 - } - case 2 => - val i = el.## - partial |= i << (nextPartialByte * 8) - if (nextPartialByte < 2) - nextPartialByte += 2 - else { - h = step(h,partial) - nextPartialByte -= 2 - partial = if (nextPartialByte == 0) 0 else i >>> 8 - } - case 4 => - h = step(h, el.##) - case 8 => - h = step(h, el.##) - h = step(h, highInt(el)) - } - } - // Handle the last few bytes of the input array - if (nextPartialByte > 0) { - h ^= partial - h *= m + /** Compute a high-quality hash of an array */ + def arrayHash[@specialized T](a: Array[T]) = { + var h = startHash(a.length * seedArray) + var c = hiddenMagicA + var k = hiddenMagicB + var j = 0 + while (j < a.length) { + h = extendHash(h, a(j).##, c, k) + c = nextMagicA(c) + k = nextMagicB(k) + j += 1 } - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - h ^= h >>> 13 - h *= m - h ^ (h >>> 15) + finalizeHash(h) } - def string(s: String): Int = { - // Initialize the hash to a 'random' value - var n = s.length - var h = n - var nn = n & ~ 1 - while (nn > 0) { - nn -= 2 - h = step(h, (s(nn + 1) << 16) | s(nn)) + /** Compute a high-quality hash of a string */ + def stringHash(s: String) = { + var h = startHash(s.length * seedString) + var c = hiddenMagicA + var k = hiddenMagicB + var j = 0 + while (j+1 < s.length) { + val i = (s.charAt(j)<<16) + s.charAt(j+1); + h = extendHash(h,i,c,k) + c = nextMagicA(c) + k = nextMagicB(k) + j += 2 } - // Handle the last few bytes of the input array - if ((n & 1) != 0) { - h ^= s(n - 1) - h *= m - } - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - h ^= h >>> 13 - h *= m - h ^ (h >>> 15) + if (j < s.length) h = extendHash(h,s.charAt(j),c,k) + finalizeHash(h) + } + + /** Compute a hash that is symmetric in its arguments--that is, + * where the order of appearance of elements does not matter. + * This is useful for hashing sets, for example. + */ + def symmetricHash[T](xs: TraversableOnce[T], seed: Int) = { + var a,b,n = 0 + var c = 1 + xs.foreach(i => { + val h = i.## + a += h + b ^= h + if (h != 0) c *= h + n += 1 + }) + var h = startHash(seed * n) + h = extendHash(h, a, storedMagicA(0), storedMagicB(0)) + h = extendHash(h, b, storedMagicA(1), storedMagicB(1)) + h = extendHash(h, c, storedMagicA(2), storedMagicB(2)) + finalizeHash(h) } } diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala index 02eeb4e84b..2d0682a47d 100644 --- a/src/library/scala/xml/Utility.scala +++ b/src/library/scala/xml/Utility.scala @@ -260,18 +260,14 @@ object Utility extends AnyRef with parsing.TokenTests * @param attribHashCode * @param children */ - def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = ( - ( if(pre ne null) {41 * pre.## % 7} else {0}) - + label.## * 53 - + attribHashCode * 7 - + scpeHash * 31 - + { - var c = 0 - val i = children.iterator - while(i.hasNext) c = c * 41 + i.next.## - c - } - ) + def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = { + val h = new util.MurmurHash[Node]( if (pre ne null) pre.## else 0 ) + h.append(label.##) + h.append(attribHashCode) + h.append(scpeHash) + children.foreach(h) + h.hash + } def appendQuoted(s: String): String = sbToString(appendQuoted(s, _)) diff --git a/test/benchmarks/src/scala/util/HashSpeedTest.scala b/test/benchmarks/src/scala/util/HashSpeedTest.scala new file mode 100644 index 0000000000..615bd47aa2 --- /dev/null +++ b/test/benchmarks/src/scala/util/HashSpeedTest.scala @@ -0,0 +1,241 @@ +object HashSpeedTest { + import System.{nanoTime => now} + + def time[A](f: => A) = { + val t0 = now + val ans = f + (ans, now - t0) + } + def ptime[A](f: => A) = { + val (ans,dt) = time(f) + printf("Elapsed: %.3f\n",dt*1e-9) + ans + } + + object HashHist { + var enabled = true + val counts = new collection.mutable.HashMap[Int,Int] + def add (i: Int) { if (enabled) counts(i) = counts.get(i).getOrElse(0)+1 } + def resultAndReset = { + var s = 0L + var o = 0L + var m = 0 + counts.valuesIterator.foreach(i => { + s += i + if (i>0) o += 1 + if (i>m) m = i + }) + counts.clear + (s,o,m) + } + } + + def report(s: String,res: (Long,Long,Int)) { + println("Hash quality of "+s) + printf(" %5.2f%% of entries are collisions\n",100*(res._1 - res._2).toDouble/res._1) + printf(" Max of %d entries mapped to the same value\n",res._3) + } + + // If you have MurmurHash3 installed, uncomment below (and in main) + import scala.util.{MurmurHash3 => MH3} + val justCountString: String => Unit = str => { + var s,i = 0 + while (i < str.length) { s += str.charAt(i); i += 1 } + HashHist.add(s) + } + val defaultHashString: String => Unit = str => HashHist.add(str.hashCode) + val murmurHashString: String => Unit = str => HashHist.add(MH3.stringHash(str)) + def makeCharStrings = { + val a = new Array[Byte](4) + val buffer = new collection.mutable.ArrayBuffer[String] + var i: Int = 'A' + while (i <= 'Z') { + a(0) = (i&0xFF).toByte + var j: Int = 'a' + while (j <= 'z') { + a(1) = (j&0xFF).toByte + var k: Int = 'A' + while (k <= 'z') { + a(2) = (k&0xFF).toByte + var l: Int = 'A' + while (l <= 'z') { + a(3) = (l&0xFF).toByte + buffer += new String(a) + l += 1 + } + k += 1 + } + j += 1 + } + i += 1 + } + buffer.toArray + } + def hashCharStrings(ss: Array[String], hash: String => Unit) { + var i = 0 + while (i < ss.length) { + hash(ss(i)) + i += 1 + } + } + + def justCountList: List[List[Int]] => Unit = lli => { + var s = 0 + lli.foreach(_.foreach(s += _)) + HashHist.add(s) + } + def defaultHashList: List[List[Int]] => Unit = lli => HashHist.add(lli.hashCode) + def makeBinaryLists = { + def singleLists(depth: Int): List[List[Int]] = { + if (depth <= 0) List(Nil) + else { + val set = singleLists(depth-1) + val longest = set filter (_.length == depth-1) + set ::: (longest.map(0 :: _)) ::: (longest.map(1 :: _)) + } + } + val buffer = new collection.mutable.ArrayBuffer[List[List[Int]]] + val blocks = singleLists(4).toArray + buffer += List(Nil) + var i = 0 + while (i < blocks.length) { + val li = blocks(i) :: Nil + buffer += li + var j = 0 + while (j < blocks.length) { + val lj = blocks(j) :: li + buffer += lj + var k = 0 + while (k < blocks.length) { + val lk = blocks(k) :: lj + buffer += lk + var l = 0 + while (l < blocks.length) { + val ll = blocks(l) :: lk + buffer += ll + l += 1 + } + k += 1 + } + j += 1 + } + i += 1 + } + buffer.toArray + } + def hashBinaryLists(ls: Array[List[List[Int]]], hash: List[List[Int]] => Unit) { + var i = 0 + while (i < ls.length) { + hash(ls(i)) + i += 1 + } + } + + def justCountSets: Set[Int] => Unit = si => { + var s = 0 + si.foreach(s += _) + HashHist.add(s) + } + def defaultHashSets: Set[Int] => Unit = si => HashHist.add(si.hashCode) + def makeIntSets = { + def sets(depth: Int): List[Set[Int]] = { + if (depth <= 0) List(Set.empty[Int]) + else { + val set = sets(depth-1) + set ::: set.map(_ + depth) + } + } + sets(20).toArray + } + def hashIntSets(ss: Array[Set[Int]], hash: Set[Int] => Unit) { + var i = 0 + while (i < ss.length) { + hash(ss(i)) + i += 1 + } + } + + def defaultHashTuples: (Product with Serializable) => Unit = p => HashHist.add(p.hashCode) + def makeNestedTuples = { + val basic = Array( + (0,0), + (0,1), + (1,0), + (1,1), + (0,0,0), + (0,0,1), + (0,1,0), + (1,0,0), + (0,0,0,0), + (0,0,0,0,0), + (false,false), + (true,false), + (false,true), + (true,true), + (0.7,true,"fish"), + ((),true,'c',400,9.2,"galactic") + ) + basic ++ + (for (i <- basic; j <- basic) yield (i,j)) ++ + (for (i <- basic; j <- basic; k <- basic) yield (i,j,k)) ++ + (for (i <- basic; j <- basic; k <- basic) yield ((i,j),k)) ++ + (for (i <- basic; j <- basic; k <- basic) yield (i,(j,k))) ++ + (for (i <- basic; j <- basic; k <- basic; l <- basic) yield (i,j,k,l)) ++ + (for (i <- basic; j <- basic; k <- basic; l <- basic) yield ((i,j),(k,l))) ++ + (for (i <- basic; j <- basic; k <- basic; l <- basic) yield (i,(j,k,l))) ++ + (for (i <- basic; j <- basic; k <- basic; l <- basic; m <- basic) yield (i,j,k,l,m)) ++ + (for (i <- basic; j <- basic; k <- basic; l <- basic; m <- basic) yield (i,(j,(k,(l,m))))) + } + def hashNestedTuples(ts: Array[Product with Serializable], hash: (Product with Serializable) => Unit) { + var i = 0 + while (i < ts.length) { + hash(ts(i)) + i += 1 + } + } + + def findSpeed[A](n: Int, h: (Array[A],A=>Unit)=>Unit, aa: Array[A], f: A=>Unit) = { + (time { for (i <- 1 to n) { h(aa,f) } }._2, aa.length.toLong*n) + } + + def reportSpeed[A](repeats: Int, xs: List[(String, ()=>(Long,Long))]) { + val tn = Array.fill(xs.length)((0L,0L)) + for (j <- 1 to repeats) { + for ((l,i) <- xs zipWithIndex) { + val x = l._2() + tn(i) = (tn(i)._1 + x._1, tn(i)._2 + x._2) + } + } + for (((t,n),(title,_)) <- (tn zip xs)) { + val rate = (n*1e-6)/(t*1e-9) + printf("Hash rate for %s: %4.2f million/second\n",title,rate) + } + } + + def main(args: Array[String]) { + val bl = makeBinaryLists + val is = makeIntSets + val nt = makeNestedTuples + // Uncomment the following for string stats if MurmurHash3 available + val cs = makeCharStrings + report("Java String hash for strings",{ hashCharStrings(cs,defaultHashString); HashHist.resultAndReset }) + report("MurmurHash3 for strings",{ hashCharStrings(cs,murmurHashString); HashHist.resultAndReset }) + HashHist.enabled = false + reportSpeed(3, List( + ("Java string hash", () => findSpeed[String](30, (x, y) => hashCharStrings(x, y),cs,defaultHashString)), + ("MurmurHash3 string hash", () => findSpeed[String](30,(x, y) => hashCharStrings(x, y),cs,murmurHashString)) + )) + // reportSpeed("Java string hash",30,hashCharStrings.tupled,cs,defaultHashString) + // reportSpeed("MurmurHash3 string hash",30,hashCharStrings.tupled,cs,murmurHashString) + HashHist.enabled = true + report("lists of binary int lists",{ hashBinaryLists(bl,defaultHashList); HashHist.resultAndReset }) + report("small integer sets",{ hashIntSets(is,defaultHashSets); HashHist.resultAndReset }) + report("small nested tuples",{ hashNestedTuples(nt,defaultHashTuples); HashHist.resultAndReset }) + HashHist.enabled = false + reportSpeed(3,List( + ("lists of lists of binary ints", () => findSpeed(20,hashBinaryLists,bl,defaultHashList)), + ("small integer sets", () => findSpeed(10,hashIntSets,is,defaultHashSets)), + ("small nested tuples", () => findSpeed(5,hashNestedTuples,nt,defaultHashTuples)) + )) + } +} diff --git a/test/files/run/hashCodeDistribution.flags b/test/files/run/hashCodeDistribution.flags deleted file mode 100644 index 5f7bd1a5be..0000000000 --- a/test/files/run/hashCodeDistribution.flags +++ /dev/null @@ -1 +0,0 @@ --Ymurmur
\ No newline at end of file |