summaryrefslogtreecommitdiff
path: root/src/library/scala
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2011-11-24 01:24:28 +0000
committerPaul Phillips <paulp@improving.org>2011-11-24 01:24:28 +0000
commit60fb9ec19b50cd8059122ccffd72014b3eefc51f (patch)
tree47f47b7b9ad13fb79df825e4e42864a9f53d7035 /src/library/scala
parent4cfca8a7f6763fbbaab37a3473d74118b3ec52bc (diff)
downloadscala-60fb9ec19b50cd8059122ccffd72014b3eefc51f.tar.gz
scala-60fb9ec19b50cd8059122ccffd72014b3eefc51f.tar.bz2
scala-60fb9ec19b50cd8059122ccffd72014b3eefc51f.zip
Refinements of "def seq" and murmurhash.
Trying to make hashcodes faster. Didn't achieve much on that front, so redirected into structural/consistency issues. The latter was lacking in terms of how/where "def seq" was defined. The documentation I can find doesn't give me much hint that the sequential form of my sequential collection might be a single-use iterator! (As in StringOps, ArrayOps.) If that's intentional it should be in huge letters. I'm assuming for now that it wasn't. Also, there was this: GenMapLike: def seq: Map[A, B] GenSetLike: def seq: Set[A] GenSeqLike: // nothing, returns Traversable So I added some def seqs where I needed the more specific types for my hashcode work. Hashcodewise, I broke the MurmurHash3 object into a reusable class and a collections-specific object, and I deprecated the methods which took GenTraversableOnce in favor of ones taking TraversableOnce, because there's no reason the hashcode library should have to know about things like "make sure to call seq before you traverse or you'll be sorry." Exclude things by their type and you can never make a mistake. End transmission.
Diffstat (limited to 'src/library/scala')
-rw-r--r--src/library/scala/collection/GenMapLike.scala3
-rw-r--r--src/library/scala/collection/GenSeqLike.scala12
-rw-r--r--src/library/scala/collection/GenSetLike.scala3
-rw-r--r--src/library/scala/collection/IndexedSeq.scala1
-rw-r--r--src/library/scala/collection/IndexedSeqLike.scala6
-rw-r--r--src/library/scala/collection/LinearSeq.scala1
-rw-r--r--src/library/scala/collection/LinearSeqLike.scala7
-rw-r--r--src/library/scala/collection/Map.scala3
-rw-r--r--src/library/scala/collection/Seq.scala5
-rw-r--r--src/library/scala/collection/Set.scala2
-rw-r--r--src/library/scala/collection/immutable/IndexedSeq.scala1
-rw-r--r--src/library/scala/collection/immutable/LinearSeq.scala1
-rw-r--r--src/library/scala/collection/immutable/Set.scala2
-rw-r--r--src/library/scala/collection/immutable/StringOps.scala2
-rw-r--r--src/library/scala/collection/mutable/ArrayOps.scala2
-rw-r--r--src/library/scala/collection/mutable/IndexedSeq.scala1
-rw-r--r--src/library/scala/collection/mutable/LinearSeq.scala1
-rw-r--r--src/library/scala/util/MurmurHash3.scala105
-rw-r--r--src/library/scala/xml/Utility.scala2
19 files changed, 103 insertions, 57 deletions
diff --git a/src/library/scala/collection/GenMapLike.scala b/src/library/scala/collection/GenMapLike.scala
index f4a3903c13..12ecbcf140 100644
--- a/src/library/scala/collection/GenMapLike.scala
+++ b/src/library/scala/collection/GenMapLike.scala
@@ -29,10 +29,9 @@ trait GenMapLike[A, +B, +Repr] extends GenIterableLike[(A, B), Repr] with Equals
def +[B1 >: B](kv: (A, B1)): GenMap[A, B1]
def - (key: A): Repr
-
// This hash code must be symmetric in the contents but ought not
// collide trivially.
- override def hashCode() = util.MurmurHash3.symmetricHash(seq, Map.hashSeed)
+ override def hashCode() = util.MurmurHash3.mapHash(seq)
/** Returns the value associated with a key, or a default value if the key is not contained in the map.
* @param key the key.
diff --git a/src/library/scala/collection/GenSeqLike.scala b/src/library/scala/collection/GenSeqLike.scala
index 2d50cb18b3..b3dd4764a9 100644
--- a/src/library/scala/collection/GenSeqLike.scala
+++ b/src/library/scala/collection/GenSeqLike.scala
@@ -31,6 +31,7 @@ import annotation.bridge
* Unlike iterables, sequences always have a defined order of elements.
*/
trait GenSeqLike[+A, +Repr] extends GenIterableLike[A, Repr] with Equals with Parallelizable[A, parallel.ParSeq[A]] {
+ def seq: Seq[A]
/** Selects an element by its index in the $coll.
*
@@ -439,16 +440,7 @@ trait GenSeqLike[+A, +Repr] extends GenIterableLike[A, Repr] with Equals with Pa
/** Hashcodes for $Coll produce a value from the hashcodes of all the
* elements of the $coll.
*/
- override def hashCode() = {
- import util.MurmurHash3._
- var n = 0
- var h = Seq.hashSeed
- seq foreach {
- x => h = mix(h, x.##)
- n += 1
- }
- finalizeHash(h, n)
- }
+ override def hashCode() = util.MurmurHash3.seqHash(seq)
/** The equals method for arbitrary sequences. Compares this sequence to
* some other object.
diff --git a/src/library/scala/collection/GenSetLike.scala b/src/library/scala/collection/GenSetLike.scala
index adbb043ecd..f729f82bb4 100644
--- a/src/library/scala/collection/GenSetLike.scala
+++ b/src/library/scala/collection/GenSetLike.scala
@@ -143,6 +143,5 @@ extends GenIterableLike[A, Repr]
// Calling map on a set drops duplicates: any hashcode collisions would
// then be dropped before they can be added.
// Hash should be symmetric in set entries, but without trivial collisions.
- override def hashCode() = util.MurmurHash3.symmetricHash(seq, Set.hashSeed)
-
+ override def hashCode() = util.MurmurHash3.setHash(seq)
}
diff --git a/src/library/scala/collection/IndexedSeq.scala b/src/library/scala/collection/IndexedSeq.scala
index 13c48ec144..4a3586a375 100644
--- a/src/library/scala/collection/IndexedSeq.scala
+++ b/src/library/scala/collection/IndexedSeq.scala
@@ -20,6 +20,7 @@ trait IndexedSeq[+A] extends Seq[A]
with GenericTraversableTemplate[A, IndexedSeq]
with IndexedSeqLike[A, IndexedSeq[A]] {
override def companion: GenericCompanion[IndexedSeq] = IndexedSeq
+ override def seq: IndexedSeq[A] = this
}
/** $factoryInfo
diff --git a/src/library/scala/collection/IndexedSeqLike.scala b/src/library/scala/collection/IndexedSeqLike.scala
index b743174260..baf5606ab5 100644
--- a/src/library/scala/collection/IndexedSeqLike.scala
+++ b/src/library/scala/collection/IndexedSeqLike.scala
@@ -6,8 +6,6 @@
** |/ **
\* */
-
-
package scala.collection
import generic._
@@ -42,6 +40,9 @@ import scala.annotation.tailrec
trait IndexedSeqLike[+A, +Repr] extends SeqLike[A, Repr] {
self =>
+ def seq: IndexedSeq[A]
+ override def hashCode() = util.MurmurHash3.seqHash(seq) // TODO - can we get faster via "indexedSeqHash" ?
+
override protected[this] def thisCollection: IndexedSeq[A] = this.asInstanceOf[IndexedSeq[A]]
override protected[this] def toCollection(repr: Repr): IndexedSeq[A] = repr.asInstanceOf[IndexedSeq[A]]
@@ -96,4 +97,3 @@ trait IndexedSeqLike[+A, +Repr] extends SeqLike[A, Repr] {
result
}
}
-
diff --git a/src/library/scala/collection/LinearSeq.scala b/src/library/scala/collection/LinearSeq.scala
index dfa0a0ed68..be143cf96b 100644
--- a/src/library/scala/collection/LinearSeq.scala
+++ b/src/library/scala/collection/LinearSeq.scala
@@ -20,6 +20,7 @@ trait LinearSeq[+A] extends Seq[A]
with GenericTraversableTemplate[A, LinearSeq]
with LinearSeqLike[A, LinearSeq[A]] {
override def companion: GenericCompanion[LinearSeq] = LinearSeq
+ override def seq: LinearSeq[A] = this
}
/** $factoryInfo
diff --git a/src/library/scala/collection/LinearSeqLike.scala b/src/library/scala/collection/LinearSeqLike.scala
index 1b3fa9b26e..75c1edac66 100644
--- a/src/library/scala/collection/LinearSeqLike.scala
+++ b/src/library/scala/collection/LinearSeqLike.scala
@@ -41,11 +41,16 @@ import scala.util.control.Breaks._
* @tparam A the element type of the $coll
* @tparam Repr the type of the actual $coll containing the elements.
*/
-trait LinearSeqLike[+A, +Repr <: LinearSeqLike[A, Repr]] extends SeqLike[A, Repr] { self: Repr =>
+trait LinearSeqLike[+A, +Repr <: LinearSeqLike[A, Repr]] extends SeqLike[A, Repr] {
+ self: Repr =>
override protected[this] def thisCollection: LinearSeq[A] = this.asInstanceOf[LinearSeq[A]]
override protected[this] def toCollection(repr: Repr): LinearSeq[A] = repr.asInstanceOf[LinearSeq[A]]
+ def seq: LinearSeq[A]
+
+ override def hashCode() = util.MurmurHash3.seqHash(seq) // TODO - can we get faster via "linearSeqHash" ?
+
override /*IterableLike*/
def iterator: Iterator[A] = new AbstractIterator[A] {
var these = self
diff --git a/src/library/scala/collection/Map.scala b/src/library/scala/collection/Map.scala
index a72bb13b5d..0c07d5bb74 100644
--- a/src/library/scala/collection/Map.scala
+++ b/src/library/scala/collection/Map.scala
@@ -37,9 +37,6 @@ trait Map[A, +B] extends Iterable[(A, B)] with GenMap[A, B] with MapLike[A, B, M
* @define coll map
*/
object Map extends MapFactory[Map] {
-
- private[collection] val hashSeed = "Map".hashCode
-
def empty[A, B]: immutable.Map[A, B] = immutable.Map.empty
/** $mapCanBuildFromInfo */
diff --git a/src/library/scala/collection/Seq.scala b/src/library/scala/collection/Seq.scala
index cb3fe5ca71..fd03a49af4 100644
--- a/src/library/scala/collection/Seq.scala
+++ b/src/library/scala/collection/Seq.scala
@@ -6,8 +6,6 @@
** |/ **
\* */
-
-
package scala.collection
import generic._
@@ -32,9 +30,6 @@ trait Seq[+A] extends PartialFunction[Int, A]
* @define Coll Seq
*/
object Seq extends SeqFactory[Seq] {
-
- private[collection] val hashSeed = "Seq".hashCode
-
/** $genericCanBuildFromInfo */
implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, Seq[A]] = ReusableCBF.asInstanceOf[GenericCanBuildFrom[A]]
diff --git a/src/library/scala/collection/Set.scala b/src/library/scala/collection/Set.scala
index bdfbcd9c74..4c67aad603 100644
--- a/src/library/scala/collection/Set.scala
+++ b/src/library/scala/collection/Set.scala
@@ -38,8 +38,6 @@ trait Set[A] extends (A => Boolean)
* @define Coll Set
*/
object Set extends SetFactory[Set] {
- private[collection] val hashSeed = "Set".hashCode
-
def newBuilder[A] = immutable.Set.newBuilder[A]
override def empty[A]: Set[A] = immutable.Set.empty[A]
implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, Set[A]] = setCanBuildFrom[A]
diff --git a/src/library/scala/collection/immutable/IndexedSeq.scala b/src/library/scala/collection/immutable/IndexedSeq.scala
index 35b93860a3..bbbef158af 100644
--- a/src/library/scala/collection/immutable/IndexedSeq.scala
+++ b/src/library/scala/collection/immutable/IndexedSeq.scala
@@ -23,6 +23,7 @@ trait IndexedSeq[+A] extends Seq[A]
with IndexedSeqLike[A, IndexedSeq[A]] {
override def companion: GenericCompanion[IndexedSeq] = IndexedSeq
override def toIndexedSeq[B >: A]: IndexedSeq[B] = this
+ override def seq: IndexedSeq[A] = this
}
/** $factoryInfo
diff --git a/src/library/scala/collection/immutable/LinearSeq.scala b/src/library/scala/collection/immutable/LinearSeq.scala
index e2e2d97588..536894c287 100644
--- a/src/library/scala/collection/immutable/LinearSeq.scala
+++ b/src/library/scala/collection/immutable/LinearSeq.scala
@@ -23,6 +23,7 @@ trait LinearSeq[+A] extends Seq[A]
with GenericTraversableTemplate[A, LinearSeq]
with LinearSeqLike[A, LinearSeq[A]] {
override def companion: GenericCompanion[LinearSeq] = LinearSeq
+ override def seq: LinearSeq[A] = this
}
/** $factoryInfo
diff --git a/src/library/scala/collection/immutable/Set.scala b/src/library/scala/collection/immutable/Set.scala
index fe3ce8fd0b..cd972d6c30 100644
--- a/src/library/scala/collection/immutable/Set.scala
+++ b/src/library/scala/collection/immutable/Set.scala
@@ -46,8 +46,6 @@ object Set extends ImmutableSetFactory[Set] {
implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, Set[A]] = setCanBuildFrom[A]
override def empty[A]: Set[A] = EmptySet.asInstanceOf[Set[A]]
- private val hashSeed = "Set".hashCode
-
/** An optimized representation for immutable empty sets */
private object EmptySet extends AbstractSet[Any] with Set[Any] with Serializable {
override def size: Int = 0
diff --git a/src/library/scala/collection/immutable/StringOps.scala b/src/library/scala/collection/immutable/StringOps.scala
index 5fc71c7259..8612357db9 100644
--- a/src/library/scala/collection/immutable/StringOps.scala
+++ b/src/library/scala/collection/immutable/StringOps.scala
@@ -48,5 +48,5 @@ final class StringOps(override val repr: String) extends StringLike[String] {
override def toString = repr
override def length = repr.length
- def seq = this.iterator
+ def seq = new WrappedString(repr)
}
diff --git a/src/library/scala/collection/mutable/ArrayOps.scala b/src/library/scala/collection/mutable/ArrayOps.scala
index 008e7fab9a..5f7c508181 100644
--- a/src/library/scala/collection/mutable/ArrayOps.scala
+++ b/src/library/scala/collection/mutable/ArrayOps.scala
@@ -93,7 +93,7 @@ abstract class ArrayOps[T] extends ArrayLike[T, Array[T]] with CustomParalleliza
bb.result
}
- def seq = this.iterator
+ def seq = thisCollection
}
diff --git a/src/library/scala/collection/mutable/IndexedSeq.scala b/src/library/scala/collection/mutable/IndexedSeq.scala
index e55cf0429f..0e2e06df84 100644
--- a/src/library/scala/collection/mutable/IndexedSeq.scala
+++ b/src/library/scala/collection/mutable/IndexedSeq.scala
@@ -23,6 +23,7 @@ trait IndexedSeq[A] extends Seq[A]
with GenericTraversableTemplate[A, IndexedSeq]
with IndexedSeqLike[A, IndexedSeq[A]] {
override def companion: GenericCompanion[IndexedSeq] = IndexedSeq
+ override def seq: IndexedSeq[A] = this
}
/** $factoryInfo
diff --git a/src/library/scala/collection/mutable/LinearSeq.scala b/src/library/scala/collection/mutable/LinearSeq.scala
index 0994c9e885..e29d6bf3e6 100644
--- a/src/library/scala/collection/mutable/LinearSeq.scala
+++ b/src/library/scala/collection/mutable/LinearSeq.scala
@@ -25,6 +25,7 @@ trait LinearSeq[A] extends Seq[A]
with GenericTraversableTemplate[A, LinearSeq]
with LinearSeqLike[A, LinearSeq[A]] {
override def companion: GenericCompanion[LinearSeq] = LinearSeq
+ override def seq: LinearSeq[A] = this
}
/** $factoryInfo
diff --git a/src/library/scala/util/MurmurHash3.scala b/src/library/scala/util/MurmurHash3.scala
index 0fd8b2e123..33d9d2f0e5 100644
--- a/src/library/scala/util/MurmurHash3.scala
+++ b/src/library/scala/util/MurmurHash3.scala
@@ -2,7 +2,6 @@ package scala.util
import java.lang.Integer.{ rotateLeft => rotl }
-
/**
* An implementation of Austin Appleby's MurmurHash 3 algorithm
* (MurmurHash3_x86_32).
@@ -22,15 +21,7 @@ import java.lang.Integer.{ rotateLeft => rotl }
*
* @see http://code.google.com/p/smhasher
*/
-object MurmurHash3 {
-
- // Some arbitrary values used as hash seeds
- final val arraySeed = 0x3c074a61
- final val stringSeed = 0xf7ca7fd2
- final val productSeed = 0xcafebabe
- final val symmetricSeed = 0xb592f7ae
- final val traversableSeed = 0xe73a8b15
-
+class MurmurHash3 {
/** Mix in a block of data into an intermediate hash value. */
final def mix(hash: Int, data: Int): Int = {
var h = mixLast(hash, data)
@@ -68,7 +59,7 @@ object MurmurHash3 {
}
/** Compute the hash of a product */
- final def productHash(x: Product, seed: Int = productSeed): Int = {
+ final def productHash(x: Product, seed: Int): Int = {
val arr = x.productArity
// Case objects have the hashCode inlined directly into the
// synthetic hashCode method, but this method should still give
@@ -88,7 +79,7 @@ object MurmurHash3 {
}
/** Compute the hash of a string */
- final def stringHash(str: String, seed: Int = stringSeed): Int = {
+ final def stringHash(str: String, seed: Int): Int = {
var h = seed
var i = 0
while (i + 1 < str.length) {
@@ -102,38 +93,39 @@ object MurmurHash3 {
/** Compute a hash that is symmetric in its arguments - that is a hash
* where the order of appearance of elements does not matter.
- * This is useful for hashing sets, for example. */
- final def symmetricHash[T](xs: collection.GenTraversableOnce[T], seed: Int = symmetricSeed): Int = {
+ * This is useful for hashing sets, for example.
+ */
+ final def unorderedHash(xs: TraversableOnce[Any], seed: Int): Int = {
var a, b, n = 0
var c = 1
- xs.seq.foreach { x =>
+ xs foreach { x =>
val h = x.##
a += h
b ^= h
if (h != 0) c *= h
n += 1
}
-
var h = seed
h = mix(h, a)
h = mix(h, b)
h = mixLast(h, c)
finalizeHash(h, n)
}
-
- /** Compute a hash for a traversable (once). */
- final def traversableHash[T](xs: collection.GenTraversableOnce[T], seed: Int = traversableSeed): Int = {
+ /** Compute a hash that depends on the order of its arguments.
+ */
+ final def orderedHash(xs: TraversableOnce[Any], seed: Int): Int = {
var n = 0
var h = seed
- xs.seq.foreach { x =>
+ xs foreach { x =>
h = mix(h, x.##)
n += 1
}
finalizeHash(h, n)
}
- /** Compute the hash of an array */
- final def arrayHash[@specialized T](a: Array[T], seed: Int = arraySeed): Int = {
+ /** Compute the hash of an array.
+ */
+ final def arrayHash[@specialized T](a: Array[T], seed: Int): Int = {
var h = seed
var i = 0
while (i < a.length) {
@@ -144,8 +136,9 @@ object MurmurHash3 {
}
/** Compute the hash of a byte array. Faster than arrayHash, because
- * it hashes 4 bytes at once. */
- final def bytesHash(data: Array[Byte], seed: Int = arraySeed): Int = {
+ * it hashes 4 bytes at once.
+ */
+ final def bytesHash(data: Array[Byte], seed: Int): Int = {
var len = data.length
var h = seed
@@ -176,3 +169,67 @@ object MurmurHash3 {
finalizeHash(h, data.length)
}
}
+
+/**
+ * An instance of MurmurHash3 with predefined seeds for various
+ * classes. Used by all the scala collections and case classes.
+ */
+object MurmurHash3 extends MurmurHash3 {
+ final val arraySeed = 0x3c074a61
+ final val stringSeed = 0xf7ca7fd2
+ final val productSeed = 0xcafebabe
+ final val symmetricSeed = 0xb592f7ae
+ final val traversableSeed = 0xe73a8b15
+ final val seqSeed = "Seq".hashCode
+ final val mapSeed = "Map".hashCode
+ final val setSeed = "Set".hashCode
+
+ def arrayHash[@specialized T](a: Array[T]): Int = arrayHash(a, arraySeed)
+ def bytesHash(data: Array[Byte]): Int = bytesHash(data, arraySeed)
+ def orderedHash(xs: TraversableOnce[Any]): Int = orderedHash(xs, symmetricSeed)
+ def productHash(x: Product): Int = productHash(x, productSeed)
+ def stringHash(x: String): Int = stringHash(x, stringSeed)
+ def unorderedHash(xs: TraversableOnce[Any]): Int = unorderedHash(xs, traversableSeed)
+
+ /** To offer some potential for optimization.
+ */
+ def seqHash(xs: collection.Seq[_]): Int = orderedHash(xs, seqSeed)
+ def mapHash(xs: collection.Map[_, _]): Int = unorderedHash(xs, mapSeed)
+ def setHash(xs: collection.Set[_]): Int = unorderedHash(xs, setSeed)
+
+ /** All this trouble and foreach still appears faster.
+ * Leaving in place in case someone would like to investigate further.
+ */
+ /**
+ def linearSeqHash(xs: collection.LinearSeq[_], seed: Int): Int = {
+ var n = 0
+ var h = seed
+ var elems = xs
+ while (elems.nonEmpty) {
+ h = mix(h, elems.head.##)
+ n += 1
+ elems = elems.tail
+ }
+ finalizeHash(h, n)
+ }
+
+ def indexedSeqHash(xs: collection.IndexedSeq[_], seed: Int): Int = {
+ var n = 0
+ var h = seed
+ val len = xs.length
+ while (n < len) {
+ h = mix(h, xs(n).##)
+ n += 1
+ }
+ finalizeHash(h, n)
+ }
+ */
+
+ @deprecated("Use unorderedHash", "2.10.0")
+ final def symmetricHash[T](xs: collection.GenTraversableOnce[T], seed: Int = symmetricSeed): Int =
+ unorderedHash(xs.seq, seed)
+
+ @deprecated("Use orderedHash", "2.10.0")
+ final def traversableHash[T](xs: collection.GenTraversableOnce[T], seed: Int = traversableSeed): Int =
+ orderedHash(xs.seq, seed)
+}
diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala
index 9f230a7866..9b48f4e1bb 100644
--- a/src/library/scala/xml/Utility.scala
+++ b/src/library/scala/xml/Utility.scala
@@ -259,7 +259,7 @@ object Utility extends AnyRef with parsing.TokenTests {
* @param children
*/
def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) =
- scala.util.MurmurHash3.traversableHash(label +: attribHashCode +: scpeHash +: children, pre.##)
+ scala.util.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##)
def appendQuoted(s: String): String = sbToString(appendQuoted(s, _))