From b689b912caa0e1bcd981c1a0092857ac83fb1e1d Mon Sep 17 00:00:00 2001 From: Aleksandar Pokopec Date: Wed, 20 Oct 2010 20:20:08 +0000 Subject: Some serious hash tries bugs fixed. Plus one wild goose chase and test fixes. No review. --- src/library/scala/collection/MapLike.scala | 3 ++- .../scala/collection/immutable/HashMap.scala | 30 ++++++++++++++++------ .../scala/collection/immutable/HashSet.scala | 26 +++++++++++++------ .../collection/parallel/ParIterableLike.scala | 6 ++++- .../collection/parallel/immutable/ParHashMap.scala | 27 ++++++++++++++++--- .../collection/parallel/immutable/ParHashSet.scala | 5 ++-- .../scala/collection/parallel/package.scala | 2 +- 7 files changed, 75 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/library/scala/collection/MapLike.scala b/src/library/scala/collection/MapLike.scala index ebf95dde81..ae1d2cc6e8 100644 --- a/src/library/scala/collection/MapLike.scala +++ b/src/library/scala/collection/MapLike.scala @@ -347,7 +347,8 @@ self => try { this forall { case (k, v) => that.get(k.asInstanceOf[b]) match { - case Some(`v`) => true + case Some(`v`) => + true case _ => false } } diff --git a/src/library/scala/collection/immutable/HashMap.scala b/src/library/scala/collection/immutable/HashMap.scala index 684f9bced4..32542b9359 100644 --- a/src/library/scala/collection/immutable/HashMap.scala +++ b/src/library/scala/collection/immutable/HashMap.scala @@ -71,7 +71,7 @@ class HashMap[A, +B] extends Map[A,B] with MapLike[A, B, HashMap[A, B]] with Par h ^ (h >>> 10) } - protected def computeHash(key: A) = improve(elemHashCode(key)) + private[collection] def computeHash(key: A) = improve(elemHashCode(key)) protected type Merger[B1] = ((A, B1), (A, B1)) => (A, B1) @@ -115,6 +115,10 @@ object HashMap extends ImmutableMapFactory[HashMap] { class HashMap1[A,+B](private[HashMap] var key: A, private[HashMap] var hash: Int, private[HashMap] var value: (B @uncheckedVariance), private[HashMap] var kv: (A,B @uncheckedVariance)) extends HashMap[A,B] { override def size = 1 + private[collection] def getKey = key + private[collection] def getHash = hash + private[collection] def computeHashFor(k: A) = computeHash(k) + override def get0(key: A, hash: Int, level: Int): Option[B] = if (hash == this.hash && key == this.key) Some(value) else None @@ -520,11 +524,23 @@ time { mNew.iterator.foreach( p => ()) } // splits this iterator into 2 iterators // returns the 1st iterator, its number of elements, and the second iterator def split: ((Iterator[(A, B)], Int), Iterator[(A, B)]) = { + def collisionToArray(c: HashMapCollision1[_, _]) = + c.asInstanceOf[HashMapCollision1[A, B]].kvs.toArray map { HashMap() + _ } + def arrayToIterators(arr: Array[HashMap[A, B]]) = { + val (fst, snd) = arr.splitAt(arr.length / 2) + val szsnd = snd.foldLeft(0)(_ + _.size) + ((new TrieIterator(snd), szsnd), new TrieIterator(fst)) + } + def splitArray(ad: Array[HashMap[A, B]]): ((Iterator[(A, B)], Int), Iterator[(A, B)]) = if (ad.length > 1) { + arrayToIterators(ad) + } else ad(0) match { + case c: HashMapCollision1[a, b] => arrayToIterators(collisionToArray(c.asInstanceOf[HashMapCollision1[A, B]])) + case hm: HashTrieMap[a, b] => splitArray(hm.elems.asInstanceOf[Array[HashMap[A, B]]]) + } + // 0) simple case: no elements have been iterated - simply divide arrayD if (arrayD != null && depth == 0 && posD == 0) { - val (fst, snd) = arrayD.splitAt(arrayD.length / 2) - val szfst = fst.foldLeft(0)(_ + _.size) - return ((new TrieIterator(fst), szfst), new TrieIterator(snd)) + return splitArray(arrayD) } // otherwise, some elements have been iterated over @@ -563,13 +579,11 @@ time { mNew.iterator.foreach( p => ()) } if (posD == arrayD.length - 1) { // 3a) positioned at the last element of arrayD val arr: Array[HashMap[A, B]] = arrayD(posD) match { - case c: HashMapCollision1[_, _] => c.asInstanceOf[HashMapCollision1[A, B]].kvs.toArray map { HashMap() + _ } + case c: HashMapCollision1[a, b] => collisionToArray(c).asInstanceOf[Array[HashMap[A, B]]] case ht: HashTrieMap[_, _] => ht.asInstanceOf[HashTrieMap[A, B]].elems case _ => error("cannot divide single element") } - val (fst, snd) = arr.splitAt(arr.length / 2) - val szsnd = snd.foldLeft(0)(_ + _.size) - ((new TrieIterator(snd), szsnd), new TrieIterator(fst)) + arrayToIterators(arr) } else { // 3b) arrayD has more free elements val (fst, snd) = arrayD.splitAt(arrayD.length - (arrayD.length - posD + 1) / 2) diff --git a/src/library/scala/collection/immutable/HashSet.scala b/src/library/scala/collection/immutable/HashSet.scala index 3f04d63b40..7d27531e8d 100644 --- a/src/library/scala/collection/immutable/HashSet.scala +++ b/src/library/scala/collection/immutable/HashSet.scala @@ -70,7 +70,7 @@ class HashSet[A] extends Set[A] h ^ (h >>> 10) } - protected def computeHash(key: A) = improve(elemHashCode(key)) + private[collection] def computeHash(key: A) = improve(elemHashCode(key)) protected def get0(key: A, hash: Int, level: Int): Boolean = false @@ -335,11 +335,23 @@ time { mNew.iterator.foreach( p => ()) } // splits this iterator into 2 iterators // returns the 1st iterator, its number of elements, and the second iterator def split: ((Iterator[A], Int), Iterator[A]) = { + def collisionToArray(c: HashSetCollision1[_]) = + c.asInstanceOf[HashSetCollision1[A]].ks.toList map { HashSet() + _ } toArray + def arrayToIterators(arr: Array[HashSet[A]]) = { + val (fst, snd) = arr.splitAt(arr.length / 2) + val szsnd = snd.foldLeft(0)(_ + _.size) + ((new TrieIterator(snd), szsnd), new TrieIterator(fst)) + } + def splitArray(ad: Array[HashSet[A]]): ((Iterator[A], Int), Iterator[A]) = if (ad.length > 1) { + arrayToIterators(ad) + } else ad(0) match { + case c: HashSetCollision1[a] => arrayToIterators(collisionToArray(c.asInstanceOf[HashSetCollision1[A]])) + case hm: HashTrieSet[a] => splitArray(hm.elems.asInstanceOf[Array[HashSet[A]]]) + } + // 0) simple case: no elements have been iterated - simply divide arrayD if (arrayD != null && depth == 0 && posD == 0) { - val (fst, snd) = arrayD.splitAt(arrayD.length / 2) - val szfst = fst.foldLeft(0)(_ + _.size) - return ((new TrieIterator(fst), szfst), new TrieIterator(snd)) + return splitArray(arrayD) } // otherwise, some elements have been iterated over @@ -378,13 +390,11 @@ time { mNew.iterator.foreach( p => ()) } if (posD == arrayD.length - 1) { // 3a) positioned at the last element of arrayD val arr: Array[HashSet[A]] = arrayD(posD) match { - case c: HashSetCollision1[_] => c.asInstanceOf[HashSetCollision1[A]].ks.toList map { HashSet() + _ } toArray + case c: HashSetCollision1[a] => collisionToArray(c).asInstanceOf[Array[HashSet[A]]] case ht: HashTrieSet[_] => ht.asInstanceOf[HashTrieSet[A]].elems case _ => error("cannot divide single element") } - val (fst, snd) = arr.splitAt(arr.length / 2) - val szsnd = snd.foldLeft(0)(_ + _.size) - ((new TrieIterator(snd), szsnd), new TrieIterator(fst)) + arrayToIterators(arr) } else { // 3b) arrayD has more free elements val (fst, snd) = arrayD.splitAt(arrayD.length - (arrayD.length - posD + 1) / 2) diff --git a/src/library/scala/collection/parallel/ParIterableLike.scala b/src/library/scala/collection/parallel/ParIterableLike.scala index 3d839119b0..3a83140a53 100644 --- a/src/library/scala/collection/parallel/ParIterableLike.scala +++ b/src/library/scala/collection/parallel/ParIterableLike.scala @@ -983,7 +983,11 @@ self => extends Transformer[(Combiner[U, This], Combiner[U, This]), Span[U, This]] { var result: (Combiner[U, This], Combiner[U, This]) = null def leaf(prev: Option[(Combiner[U, This], Combiner[U, This])]) = if (pos < pit.indexFlag) { - result = pit.span2combiners(pred, reuse(prev.map(_._1), cbf()), reuse(prev.map(_._2), cbf())) + // val lst = pit.toList + // val pa = mutable.ParArray(lst: _*) + // val str = "At leaf we will iterate: " + pa.parallelIterator.toList + result = pit.span2combiners(pred, cbf(), cbf()) // do NOT reuse old combiners here, lest ye be surprised + // println("\nAt leaf result is: " + result) if (result._2.size > 0) pit.setIndexFlagIfLesser(pos) } else { result = (reuse(prev.map(_._2), cbf()), pit.copy2builder[U, This, Combiner[U, This]](reuse(prev.map(_._2), cbf()))) diff --git a/src/library/scala/collection/parallel/immutable/ParHashMap.scala b/src/library/scala/collection/parallel/immutable/ParHashMap.scala index 7cc0adbb4f..07caafb417 100644 --- a/src/library/scala/collection/parallel/immutable/ParHashMap.scala +++ b/src/library/scala/collection/parallel/immutable/ParHashMap.scala @@ -82,7 +82,8 @@ self => } def next: (K, V) = { i += 1 - triter.next + val r = triter.next + r } def hasNext: Boolean = { i < sz @@ -90,6 +91,21 @@ self => def remaining = sz - i } + private[parallel] def printDebugInfo { + println("Parallel hash trie") + println("Top level inner trie type: " + trie.getClass) + trie match { + case hm: HashMap.HashMap1[k, v] => + println("single node type") + println("key stored: " + hm.getKey) + println("hash of key: " + hm.getHash) + println("computed hash of " + hm.getKey + ": " + hm.computeHashFor(hm.getKey)) + println("trie.get(key): " + hm.get(hm.getKey)) + case _ => + println("other kind of node") + } + } + } @@ -112,10 +128,11 @@ private[immutable] abstract class HashMapCombiner[K, V] extends collection.parallel.BucketCombiner[(K, V), ParHashMap[K, V], (K, V), HashMapCombiner[K, V]](HashMapCombiner.rootsize) { self: EnvironmentPassingCombiner[(K, V), ParHashMap[K, V]] => import HashMapCombiner._ + val emptyTrie = HashMap.empty[K, V] def +=(elem: (K, V)) = { sz += 1 - val hc = elem._1.## + val hc = emptyTrie.computeHash(elem._1) val pos = hc & 0x1f if (lasts(pos) eq null) { // initialize bucket @@ -149,6 +166,10 @@ self: EnvironmentPassingCombiner[(K, V), ParHashMap[K, V]] => } } + override def toString = { + "HashTrieCombiner(buckets:\n\t" + heads.filter(_ != null).mkString("\n\t") + ")\n" + } + /* tasks */ class CreateTrie(buckets: Array[Unrolled[(K, V)]], root: Array[HashMap[K, V]], offset: Int, howmany: Int) extends super.Task[Unit, CreateTrie] { @@ -171,7 +192,7 @@ self: EnvironmentPassingCombiner[(K, V), ParHashMap[K, V]] => val chunksz = unrolled.size while (i < chunksz) { val kv = chunkarr(i) - val hc = kv._1.## + val hc = trie.computeHash(kv._1) trie = trie.updated0(kv._1, hc, rootbits, kv._2, kv, null) i += 1 } diff --git a/src/library/scala/collection/parallel/immutable/ParHashSet.scala b/src/library/scala/collection/parallel/immutable/ParHashSet.scala index c9554ae1eb..33e2e7102a 100644 --- a/src/library/scala/collection/parallel/immutable/ParHashSet.scala +++ b/src/library/scala/collection/parallel/immutable/ParHashSet.scala @@ -106,10 +106,11 @@ private[immutable] abstract class HashSetCombiner[T] extends collection.parallel.BucketCombiner[T, ParHashSet[T], Any, HashSetCombiner[T]](HashSetCombiner.rootsize) { self: EnvironmentPassingCombiner[T, ParHashSet[T]] => import HashSetCombiner._ + val emptyTrie = HashSet.empty[T] def +=(elem: T) = { sz += 1 - val hc = elem.## + val hc = emptyTrie.computeHash(elem) val pos = hc & 0x1f if (lasts(pos) eq null) { // initialize bucket @@ -166,7 +167,7 @@ self: EnvironmentPassingCombiner[T, ParHashSet[T]] => val chunksz = unrolled.size while (i < chunksz) { val v = chunkarr(i).asInstanceOf[T] - val hc = v.## + val hc = trie.computeHash(v) trie = trie.updated0(v, hc, rootbits) i += 1 } diff --git a/src/library/scala/collection/parallel/package.scala b/src/library/scala/collection/parallel/package.scala index bb53dcdaaa..de3a790727 100644 --- a/src/library/scala/collection/parallel/package.scala +++ b/src/library/scala/collection/parallel/package.scala @@ -116,7 +116,7 @@ package object parallel { unrolled = unrolled.next } } - override def toString = array.mkString("Unrolled(", ", ", ")") + override def toString = array.take(size).mkString("Unrolled(", ", ", ")") + (if (next ne null) next.toString else "") } /** A helper iterator for iterating very small array buffers. -- cgit v1.2.3