diff options
author | Josh Suereth <Joshua.Suereth@gmail.com> | 2012-06-07 12:46:07 -0700 |
---|---|---|
committer | Josh Suereth <Joshua.Suereth@gmail.com> | 2012-06-07 12:46:07 -0700 |
commit | d6a57968921b3c158d2414f8913be89d98beb541 (patch) | |
tree | 10c37868453bfdc24d6a699e094aa9c6f0b61a6e /src | |
parent | 961092b95659f9127f8a077d0060c701b4714240 (diff) | |
parent | 881641f83461b5fc23ab25d1efa08b7a760a3363 (diff) | |
download | scala-d6a57968921b3c158d2414f8913be89d98beb541.tar.gz scala-d6a57968921b3c158d2414f8913be89d98beb541.tar.bz2 scala-d6a57968921b3c158d2414f8913be89d98beb541.zip |
Merge pull request #677 from axel22/feature/util-hashing2
Add the first iteration of the `util.hashing` package.
Diffstat (limited to 'src')
-rw-r--r-- | src/library/scala/collection/GenMapLike.scala | 2 | ||||
-rw-r--r-- | src/library/scala/collection/GenSeqLike.scala | 2 | ||||
-rw-r--r-- | src/library/scala/collection/GenSetLike.scala | 2 | ||||
-rw-r--r-- | src/library/scala/collection/IndexedSeqLike.scala | 2 | ||||
-rw-r--r-- | src/library/scala/collection/LinearSeqLike.scala | 2 | ||||
-rw-r--r-- | src/library/scala/collection/concurrent/TrieMap.scala | 8 | ||||
-rw-r--r-- | src/library/scala/collection/convert/Wrappers.scala | 9 | ||||
-rw-r--r-- | src/library/scala/collection/mutable/FlatHashTable.scala | 4 | ||||
-rw-r--r-- | src/library/scala/collection/mutable/HashTable.scala | 7 | ||||
-rw-r--r-- | src/library/scala/runtime/ScalaRunTime.scala | 2 | ||||
-rw-r--r-- | src/library/scala/util/hashing/ByteswapHashing.scala | 35 | ||||
-rw-r--r-- | src/library/scala/util/hashing/Hashing.scala | 6 | ||||
-rw-r--r-- | src/library/scala/util/hashing/MurmurHash3.scala (renamed from src/library/scala/util/MurmurHash3.scala) | 70 | ||||
-rw-r--r-- | src/library/scala/util/hashing/package.scala | 35 | ||||
-rwxr-xr-x | src/library/scala/xml/Utility.scala | 2 |
15 files changed, 133 insertions, 55 deletions
diff --git a/src/library/scala/collection/GenMapLike.scala b/src/library/scala/collection/GenMapLike.scala index 4dd2a4fe37..b6c90d4d2a 100644 --- a/src/library/scala/collection/GenMapLike.scala +++ b/src/library/scala/collection/GenMapLike.scala @@ -31,7 +31,7 @@ trait GenMapLike[A, +B, +Repr] extends GenIterableLike[(A, B), Repr] with Equals // This hash code must be symmetric in the contents but ought not // collide trivially. - override def hashCode() = util.MurmurHash3.mapHash(seq) + override def hashCode() = util.hashing.MurmurHash3.mapHash(seq) /** Returns the value associated with a key, or a default value if the key is not contained in the map. * @param key the key. diff --git a/src/library/scala/collection/GenSeqLike.scala b/src/library/scala/collection/GenSeqLike.scala index cfa0ca101e..a77cb05960 100644 --- a/src/library/scala/collection/GenSeqLike.scala +++ b/src/library/scala/collection/GenSeqLike.scala @@ -465,7 +465,7 @@ trait GenSeqLike[+A, +Repr] extends Any with GenIterableLike[A, Repr] with Equal /** Hashcodes for $Coll produce a value from the hashcodes of all the * elements of the $coll. */ - override def hashCode() = util.MurmurHash3.seqHash(seq) + override def hashCode() = util.hashing.MurmurHash3.seqHash(seq) /** The equals method for arbitrary sequences. Compares this sequence to * some other object. diff --git a/src/library/scala/collection/GenSetLike.scala b/src/library/scala/collection/GenSetLike.scala index 219374abc6..18eb31da03 100644 --- a/src/library/scala/collection/GenSetLike.scala +++ b/src/library/scala/collection/GenSetLike.scala @@ -127,5 +127,5 @@ extends GenIterableLike[A, Repr] // Calling map on a set drops duplicates: any hashcode collisions would // then be dropped before they can be added. // Hash should be symmetric in set entries, but without trivial collisions. - override def hashCode() = util.MurmurHash3.setHash(seq) + override def hashCode() = util.hashing.MurmurHash3.setHash(seq) } diff --git a/src/library/scala/collection/IndexedSeqLike.scala b/src/library/scala/collection/IndexedSeqLike.scala index 11f481e425..f79a9d2c66 100644 --- a/src/library/scala/collection/IndexedSeqLike.scala +++ b/src/library/scala/collection/IndexedSeqLike.scala @@ -41,7 +41,7 @@ trait IndexedSeqLike[+A, +Repr] extends Any with SeqLike[A, Repr] { self => def seq: IndexedSeq[A] - override def hashCode() = util.MurmurHash3.seqHash(seq) // TODO - can we get faster via "indexedSeqHash" ? + override def hashCode() = util.hashing.MurmurHash3.seqHash(seq) // TODO - can we get faster via "indexedSeqHash" ? override protected[this] def thisCollection: IndexedSeq[A] = this.asInstanceOf[IndexedSeq[A]] override protected[this] def toCollection(repr: Repr): IndexedSeq[A] = repr.asInstanceOf[IndexedSeq[A]] diff --git a/src/library/scala/collection/LinearSeqLike.scala b/src/library/scala/collection/LinearSeqLike.scala index ceb980ff80..bfe27ef94a 100644 --- a/src/library/scala/collection/LinearSeqLike.scala +++ b/src/library/scala/collection/LinearSeqLike.scala @@ -50,7 +50,7 @@ trait LinearSeqLike[+A, +Repr <: LinearSeqLike[A, Repr]] extends SeqLike[A, Repr def seq: LinearSeq[A] - override def hashCode() = util.MurmurHash3.seqHash(seq) // TODO - can we get faster via "linearSeqHash" ? + override def hashCode() = util.hashing.MurmurHash3.seqHash(seq) // TODO - can we get faster via "linearSeqHash" ? override /*IterableLike*/ def iterator: Iterator[A] = new AbstractIterator[A] { diff --git a/src/library/scala/collection/concurrent/TrieMap.scala b/src/library/scala/collection/concurrent/TrieMap.scala index f944d20bdb..08e9125bd8 100644 --- a/src/library/scala/collection/concurrent/TrieMap.scala +++ b/src/library/scala/collection/concurrent/TrieMap.scala @@ -825,7 +825,7 @@ extends scala.collection.concurrent.Map[K, V] } @inline - def computeHash(k: K) = hashingobj.hashCode(k) + def computeHash(k: K) = hashingobj.hash(k) final def lookup(k: K): V = { val hc = computeHash(k) @@ -915,11 +915,7 @@ object TrieMap extends MutableMapFactory[TrieMap] { def empty[K, V]: TrieMap[K, V] = new TrieMap[K, V] class MangledHashing[K] extends Hashing[K] { - def hashCode(k: K) = { - var hcode = k.## * 0x9e3775cd - hcode = java.lang.Integer.reverseBytes(hcode) - hcode * 0x9e3775cd - } + def hash(k: K) = util.hashing.byteswap32(k.##) } } diff --git a/src/library/scala/collection/convert/Wrappers.scala b/src/library/scala/collection/convert/Wrappers.scala index 6c66ac6c4d..8c603dc91b 100644 --- a/src/library/scala/collection/convert/Wrappers.scala +++ b/src/library/scala/collection/convert/Wrappers.scala @@ -172,20 +172,15 @@ private[collection] trait Wrappers { def hasNext = ui.hasNext - def improve(hc: Int) = { - var i = hc * 0x9e3775cd - i = java.lang.Integer.reverseBytes(i) - i * 0x9e3775c - } - def next() = { val (k, v) = ui.next prev = Some(k) new ju.Map.Entry[A, B] { + import util.hashing.byteswap32 def getKey = k def getValue = v def setValue(v1 : B) = self.put(k, v1) - override def hashCode = improve(k.hashCode) + (improve(v.hashCode) << 16) + override def hashCode = byteswap32(k.hashCode) + (byteswap32(v.hashCode) << 16) override def equals(other: Any) = other match { case e: ju.Map.Entry[_, _] => k == e.getKey && v == e.getValue case _ => false diff --git a/src/library/scala/collection/mutable/FlatHashTable.scala b/src/library/scala/collection/mutable/FlatHashTable.scala index 4070174902..f6d4cc31b6 100644 --- a/src/library/scala/collection/mutable/FlatHashTable.scala +++ b/src/library/scala/collection/mutable/FlatHashTable.scala @@ -397,9 +397,7 @@ private[collection] object FlatHashTable { //h = h + (h << 4) //h ^ (h >>> 10) - var i = hcode * 0x9e3775cd - i = java.lang.Integer.reverseBytes(i) - val improved = i * 0x9e3775cd + val improved = util.hashing.byteswap32(hcode) // for the remainder, see SI-5293 // to ensure that different bits are used for different hash tables, we have to rotate based on the seed diff --git a/src/library/scala/collection/mutable/HashTable.scala b/src/library/scala/collection/mutable/HashTable.scala index c307e6dcab..97e794f06e 100644 --- a/src/library/scala/collection/mutable/HashTable.scala +++ b/src/library/scala/collection/mutable/HashTable.scala @@ -401,12 +401,7 @@ private[collection] object HashTable { * * For performance reasons, we avoid this improvement. * */ - var i = hcode * 0x9e3775cd - i = java.lang.Integer.reverseBytes(i) - i = i * 0x9e3775cd - // a slower alternative for byte reversal: - // i = (i << 16) | (i >> 16) - // i = ((i >> 8) & 0x00ff00ff) | ((i << 8) & 0xff00ff00) + val i = util.hashing.byteswap32(hcode) /* Jenkins hash * for range 0-10000, output has the msb set to zero */ diff --git a/src/library/scala/runtime/ScalaRunTime.scala b/src/library/scala/runtime/ScalaRunTime.scala index 4c5e0e408b..5cd301a0fe 100644 --- a/src/library/scala/runtime/ScalaRunTime.scala +++ b/src/library/scala/runtime/ScalaRunTime.scala @@ -199,7 +199,7 @@ object ScalaRunTime { def _toString(x: Product): String = x.productIterator.mkString(x.productPrefix + "(", ",", ")") - def _hashCode(x: Product): Int = scala.util.MurmurHash3.productHash(x) + def _hashCode(x: Product): Int = scala.util.hashing.MurmurHash3.productHash(x) /** A helper for case classes. */ def typedProductIterator[T](x: Product): Iterator[T] = { diff --git a/src/library/scala/util/hashing/ByteswapHashing.scala b/src/library/scala/util/hashing/ByteswapHashing.scala new file mode 100644 index 0000000000..fc8a33a486 --- /dev/null +++ b/src/library/scala/util/hashing/ByteswapHashing.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2011, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.util.hashing + + + + + + +/** A fast multiplicative hash by Phil Bagwell. + */ +final class ByteswapHashing[T] extends Hashing[T] { + + def hash(v: T) = byteswap32(v.##) + +} + + +object ByteswapHashing { + + private class Chained[T](h: Hashing[T]) extends Hashing[T] { + def hash(v: T) = byteswap32(h.hash(v)) + } + + /** Composes another `Hashing` with the Byteswap hash. + */ + def chain[T](h: Hashing[T]): Hashing[T] = new Chained(h) + +} diff --git a/src/library/scala/util/hashing/Hashing.scala b/src/library/scala/util/hashing/Hashing.scala index f27a825125..84b549f35e 100644 --- a/src/library/scala/util/hashing/Hashing.scala +++ b/src/library/scala/util/hashing/Hashing.scala @@ -22,7 +22,7 @@ package scala.util.hashing @annotation.implicitNotFound(msg = "No implicit Hashing defined for ${T}.") trait Hashing[T] extends Serializable { - def hashCode(x: T): Int + def hash(x: T): Int } @@ -30,13 +30,13 @@ trait Hashing[T] extends Serializable { object Hashing { final class Default[T] extends Hashing[T] { - def hashCode(x: T) = x.## + def hash(x: T) = x.## } implicit def default[T] = new Default[T] def fromFunction[T](f: T => Int) = new Hashing[T] { - def hashCode(x: T) = f(x) + def hash(x: T) = f(x) } } diff --git a/src/library/scala/util/MurmurHash3.scala b/src/library/scala/util/hashing/MurmurHash3.scala index 9a7f64b4ee..3efd5b5e72 100644 --- a/src/library/scala/util/MurmurHash3.scala +++ b/src/library/scala/util/hashing/MurmurHash3.scala @@ -6,30 +6,11 @@ ** |/ ** \* */ -package scala.util +package scala.util.hashing import java.lang.Integer.{ rotateLeft => rotl } -/** - * An implementation of Austin Appleby's MurmurHash 3 algorithm - * (MurmurHash3_x86_32). - * - * An algorithm designed to generate well-distributed non-cryptographic - * hashes. It is designed to hash data in 32 bit chunks (ints). - * - * The mix method needs to be called at each step to update the intermediate - * hash value. For the last chunk to incorporate into the hash mixLast may - * be used instead, which is slightly faster. Finally finalizeHash needs to - * be called to compute the final hash value. - * - * This is based on the earlier MurmurHash3 code by Rex Kerr, but the - * MurmurHash3 algorithm was since changed by its creator Austin Appleby - * to remedy some weaknesses and improve performance. This represents the - * latest and supposedly final version of the algortihm (revision 136). - * - * @see [[http://code.google.com/p/smhasher]] - */ -class MurmurHash3 { +private[hashing] class MurmurHash3 { /** Mix in a block of data into an intermediate hash value. */ final def mix(hash: Int, data: Int): Int = { var h = mixLast(hash, data) @@ -179,8 +160,25 @@ class MurmurHash3 { } /** - * An instance of MurmurHash3 with predefined seeds for various - * classes. Used by all the scala collections and case classes. + * An implementation of Austin Appleby's MurmurHash 3 algorithm + * (MurmurHash3_x86_32). This object contains methods that hash + * values of various types as well as means to construct `Hashing` + * objects. + * + * This algorithm is designed to generate well-distributed non-cryptographic + * hashes. It is designed to hash data in 32 bit chunks (ints). + * + * The mix method needs to be called at each step to update the intermediate + * hash value. For the last chunk to incorporate into the hash mixLast may + * be used instead, which is slightly faster. Finally finalizeHash needs to + * be called to compute the final hash value. + * + * This is based on the earlier MurmurHash3 code by Rex Kerr, but the + * MurmurHash3 algorithm was since changed by its creator Austin Appleby + * to remedy some weaknesses and improve performance. This represents the + * latest and supposedly final version of the algortihm (revision 136). + * + * @see [[http://code.google.com/p/smhasher]] */ object MurmurHash3 extends MurmurHash3 { final val arraySeed = 0x3c074a61 @@ -205,6 +203,32 @@ object MurmurHash3 extends MurmurHash3 { def mapHash(xs: collection.Map[_, _]): Int = unorderedHash(xs, mapSeed) def setHash(xs: collection.Set[_]): Int = unorderedHash(xs, setSeed) + class ArrayHashing[@specialized T] extends Hashing[Array[T]] { + def hash(a: Array[T]) = arrayHash(a) + } + + def arrayHashing[@specialized T] = new ArrayHashing[T] + + def bytesHashing = new Hashing[Array[Byte]] { + def hash(data: Array[Byte]) = bytesHash(data) + } + + def orderedHashing = new Hashing[TraversableOnce[Any]] { + def hash(xs: TraversableOnce[Any]) = orderedHash(xs) + } + + def productHashing = new Hashing[Product] { + def hash(x: Product) = productHash(x) + } + + def stringHashing = new Hashing[String] { + def hash(x: String) = stringHash(x) + } + + def unorderedHashing = new Hashing[TraversableOnce[Any]] { + def hash(xs: TraversableOnce[Any]) = unorderedHash(xs) + } + /** All this trouble and foreach still appears faster. * Leaving in place in case someone would like to investigate further. */ diff --git a/src/library/scala/util/hashing/package.scala b/src/library/scala/util/hashing/package.scala new file mode 100644 index 0000000000..becfa4911e --- /dev/null +++ b/src/library/scala/util/hashing/package.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2011, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.util + + + + + + +package object hashing { + + /** Fast multiplicative hash with a nice distribution. + */ + def byteswap32(v: Int): Int = { + var hc = v * 0x9e3775cd + hc = java.lang.Integer.reverseBytes(hc) + hc * 0x9e3775cd + } + + /** Fast multiplicative hash with a nice distribution + * for 64-bit values. + */ + def byteswap64(v: Long): Long = { + var hc = v * 0x9e3775cd9e3775cdL + hc = java.lang.Long.reverseBytes(hc) + hc * 0x9e3775cd9e3775cdL + } + +} diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala index 17c91fa52c..bae529c85c 100755 --- a/src/library/scala/xml/Utility.scala +++ b/src/library/scala/xml/Utility.scala @@ -268,7 +268,7 @@ object Utility extends AnyRef with parsing.TokenTests { * Returns a hashcode for the given constituents of a node */ def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = - scala.util.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##) + scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##) def appendQuoted(s: String): String = sbToString(appendQuoted(s, _)) |