Some serious hash tries bugs fixed.

Plus one wild goose chase and test fixes. No review.
author: Aleksandar Pokopec <aleksandar.prokopec@epfl.ch> 2010-10-20 20:20:08 +0000
committer: Aleksandar Pokopec <aleksandar.prokopec@epfl.ch> 2010-10-20 20:20:08 +0000
commit: b689b912caa0e1bcd981c1a0092857ac83fb1e1d (patch)
tree: 194b115ae192b472baf904de14f7c61bce6af915 /src
parent: b1d9354a08107afab9c15d2ae1b8dded82b5ffef (diff)
download: scala-b689b912caa0e1bcd981c1a0092857ac83fb1e1d.tar.gz
scala-b689b912caa0e1bcd981c1a0092857ac83fb1e1d.tar.bz2
scala-b689b912caa0e1bcd981c1a0092857ac83fb1e1d.zip
7 files changed, 75 insertions, 24 deletions
diff --git a/src/library/scala/collection/MapLike.scala b/src/library/scala/collection/MapLike.scala
index ebf95dde81..ae1d2cc6e8 100644
--- a/src/library/scala/collection/MapLike.scala
+++ b/src/library/scala/collection/MapLike.scala
@@ -347,7 +347,8 @@ self =>
       try {
         this forall {
           case (k, v) => that.get(k.asInstanceOf[b]) match {
-            case Some(`v`) => true
+            case Some(`v`) =>
+              true
             case _ => false
           }
         }
diff --git a/src/library/scala/collection/immutable/HashMap.scala b/src/library/scala/collection/immutable/HashMap.scala
index 684f9bced4..32542b9359 100644
--- a/src/library/scala/collection/immutable/HashMap.scala
+++ b/src/library/scala/collection/immutable/HashMap.scala
@@ -71,7 +71,7 @@ class HashMap[A, +B] extends Map[A,B] with MapLike[A, B, HashMap[A, B]] with Par
     h ^ (h >>> 10)
   }
 
-  protected def computeHash(key: A) = improve(elemHashCode(key))
+  private[collection] def computeHash(key: A) = improve(elemHashCode(key))
 
   protected type Merger[B1] = ((A, B1), (A, B1)) => (A, B1)
 
@@ -115,6 +115,10 @@ object HashMap extends ImmutableMapFactory[HashMap] {
   class HashMap1[A,+B](private[HashMap] var key: A, private[HashMap] var hash: Int, private[HashMap] var value: (B @uncheckedVariance), private[HashMap] var kv: (A,B @uncheckedVariance)) extends HashMap[A,B] {
     override def size = 1
 
+    private[collection] def getKey = key
+    private[collection] def getHash = hash
+    private[collection] def computeHashFor(k: A) = computeHash(k)
+
     override def get0(key: A, hash: Int, level: Int): Option[B] =
       if (hash == this.hash && key == this.key) Some(value) else None
 
@@ -520,11 +524,23 @@ time { mNew.iterator.foreach( p => ()) }
     // splits this iterator into 2 iterators
     // returns the 1st iterator, its number of elements, and the second iterator
     def split: ((Iterator[(A, B)], Int), Iterator[(A, B)]) = {
+      def collisionToArray(c: HashMapCollision1[_, _]) =
+        c.asInstanceOf[HashMapCollision1[A, B]].kvs.toArray map { HashMap() + _ }
+      def arrayToIterators(arr: Array[HashMap[A, B]]) = {
+        val (fst, snd) = arr.splitAt(arr.length / 2)
+        val szsnd = snd.foldLeft(0)(_ + _.size)
+        ((new TrieIterator(snd), szsnd), new TrieIterator(fst))
+      }
+      def splitArray(ad: Array[HashMap[A, B]]): ((Iterator[(A, B)], Int), Iterator[(A, B)]) = if (ad.length > 1) {
+        arrayToIterators(ad)
+      } else ad(0) match {
+        case c: HashMapCollision1[a, b] => arrayToIterators(collisionToArray(c.asInstanceOf[HashMapCollision1[A, B]]))
+        case hm: HashTrieMap[a, b] => splitArray(hm.elems.asInstanceOf[Array[HashMap[A, B]]])
+      }
+
       // 0) simple case: no elements have been iterated - simply divide arrayD
       if (arrayD != null && depth == 0 && posD == 0) {
-        val (fst, snd) = arrayD.splitAt(arrayD.length / 2)
-        val szfst = fst.foldLeft(0)(_ + _.size)
-        return ((new TrieIterator(fst), szfst), new TrieIterator(snd))
+        return splitArray(arrayD)
       }
 
       // otherwise, some elements have been iterated over
@@ -563,13 +579,11 @@ time { mNew.iterator.foreach( p => ()) }
           if (posD == arrayD.length - 1) {
             // 3a) positioned at the last element of arrayD
             val arr: Array[HashMap[A, B]] = arrayD(posD) match {
-              case c: HashMapCollision1[_, _] => c.asInstanceOf[HashMapCollision1[A, B]].kvs.toArray map { HashMap() + _ }
+              case c: HashMapCollision1[a, b] => collisionToArray(c).asInstanceOf[Array[HashMap[A, B]]]
               case ht: HashTrieMap[_, _] => ht.asInstanceOf[HashTrieMap[A, B]].elems
               case _ => error("cannot divide single element")
             }
-            val (fst, snd) = arr.splitAt(arr.length / 2)
-            val szsnd = snd.foldLeft(0)(_ + _.size)
-            ((new TrieIterator(snd), szsnd), new TrieIterator(fst))
+            arrayToIterators(arr)
           } else {
             // 3b) arrayD has more free elements
             val (fst, snd) = arrayD.splitAt(arrayD.length - (arrayD.length - posD + 1) / 2)
diff --git a/src/library/scala/collection/immutable/HashSet.scala b/src/library/scala/collection/immutable/HashSet.scala
index 3f04d63b40..7d27531e8d 100644
--- a/src/library/scala/collection/immutable/HashSet.scala
+++ b/src/library/scala/collection/immutable/HashSet.scala
@@ -70,7 +70,7 @@ class HashSet[A] extends Set[A]
     h ^ (h >>> 10)
   }
 
-  protected def computeHash(key: A) = improve(elemHashCode(key))
+  private[collection] def computeHash(key: A) = improve(elemHashCode(key))
 
   protected def get0(key: A, hash: Int, level: Int): Boolean = false
 
@@ -335,11 +335,23 @@ time { mNew.iterator.foreach( p => ()) }
     // splits this iterator into 2 iterators
     // returns the 1st iterator, its number of elements, and the second iterator
     def split: ((Iterator[A], Int), Iterator[A]) = {
+      def collisionToArray(c: HashSetCollision1[_]) =
+        c.asInstanceOf[HashSetCollision1[A]].ks.toList map { HashSet() + _ } toArray
+      def arrayToIterators(arr: Array[HashSet[A]]) = {
+        val (fst, snd) = arr.splitAt(arr.length / 2)
+        val szsnd = snd.foldLeft(0)(_ + _.size)
+        ((new TrieIterator(snd), szsnd), new TrieIterator(fst))
+      }
+      def splitArray(ad: Array[HashSet[A]]): ((Iterator[A], Int), Iterator[A]) = if (ad.length > 1) {
+        arrayToIterators(ad)
+      } else ad(0) match {
+        case c: HashSetCollision1[a] => arrayToIterators(collisionToArray(c.asInstanceOf[HashSetCollision1[A]]))
+        case hm: HashTrieSet[a] => splitArray(hm.elems.asInstanceOf[Array[HashSet[A]]])
+      }
+
       // 0) simple case: no elements have been iterated - simply divide arrayD
       if (arrayD != null && depth == 0 && posD == 0) {
-        val (fst, snd) = arrayD.splitAt(arrayD.length / 2)
-        val szfst = fst.foldLeft(0)(_ + _.size)
-        return ((new TrieIterator(fst), szfst), new TrieIterator(snd))
+        return splitArray(arrayD)
       }
 
       // otherwise, some elements have been iterated over
@@ -378,13 +390,11 @@ time { mNew.iterator.foreach( p => ()) }
           if (posD == arrayD.length - 1) {
             // 3a) positioned at the last element of arrayD
             val arr: Array[HashSet[A]] = arrayD(posD) match {
-              case c: HashSetCollision1[_] => c.asInstanceOf[HashSetCollision1[A]].ks.toList map { HashSet() + _ } toArray
+              case c: HashSetCollision1[a] => collisionToArray(c).asInstanceOf[Array[HashSet[A]]]
               case ht: HashTrieSet[_] => ht.asInstanceOf[HashTrieSet[A]].elems
               case _ => error("cannot divide single element")
             }
-            val (fst, snd) = arr.splitAt(arr.length / 2)
-            val szsnd = snd.foldLeft(0)(_ + _.size)
-            ((new TrieIterator(snd), szsnd), new TrieIterator(fst))
+            arrayToIterators(arr)
           } else {
             // 3b) arrayD has more free elements
             val (fst, snd) = arrayD.splitAt(arrayD.length - (arrayD.length - posD + 1) / 2)
diff --git a/src/library/scala/collection/parallel/ParIterableLike.scala b/src/library/scala/collection/parallel/ParIterableLike.scala
index 3d839119b0..3a83140a53 100644
--- a/src/library/scala/collection/parallel/ParIterableLike.scala
+++ b/src/library/scala/collection/parallel/ParIterableLike.scala
@@ -983,7 +983,11 @@ self =>
   extends Transformer[(Combiner[U, This], Combiner[U, This]), Span[U, This]] {
     var result: (Combiner[U, This], Combiner[U, This]) = null
     def leaf(prev: Option[(Combiner[U, This], Combiner[U, This])]) = if (pos < pit.indexFlag) {
-      result = pit.span2combiners(pred, reuse(prev.map(_._1), cbf()), reuse(prev.map(_._2), cbf()))
+      // val lst = pit.toList
+      // val pa = mutable.ParArray(lst: _*)
+      // val str = "At leaf we will iterate: " + pa.parallelIterator.toList
+      result = pit.span2combiners(pred, cbf(), cbf()) // do NOT reuse old combiners here, lest ye be surprised
+      // println("\nAt leaf result is: " + result)
       if (result._2.size > 0) pit.setIndexFlagIfLesser(pos)
     } else {
       result = (reuse(prev.map(_._2), cbf()), pit.copy2builder[U, This, Combiner[U, This]](reuse(prev.map(_._2), cbf())))
diff --git a/src/library/scala/collection/parallel/immutable/ParHashMap.scala b/src/library/scala/collection/parallel/immutable/ParHashMap.scala
index 7cc0adbb4f..07caafb417 100644
--- a/src/library/scala/collection/parallel/immutable/ParHashMap.scala
+++ b/src/library/scala/collection/parallel/immutable/ParHashMap.scala
@@ -82,7 +82,8 @@ self =>
     }
     def next: (K, V) = {
       i += 1
-      triter.next
+      val r = triter.next
+      r
     }
     def hasNext: Boolean = {
       i < sz
@@ -90,6 +91,21 @@ self =>
     def remaining = sz - i
   }
 
+  private[parallel] def printDebugInfo {
+    println("Parallel hash trie")
+    println("Top level inner trie type: " + trie.getClass)
+    trie match {
+      case hm: HashMap.HashMap1[k, v] =>
+        println("single node type")
+        println("key stored: " + hm.getKey)
+        println("hash of key: " + hm.getHash)
+        println("computed hash of " + hm.getKey + ": " + hm.computeHashFor(hm.getKey))
+        println("trie.get(key): " + hm.get(hm.getKey))
+      case _ =>
+        println("other kind of node")
+    }
+  }
+
 }
 
 
@@ -112,10 +128,11 @@ private[immutable] abstract class HashMapCombiner[K, V]
 extends collection.parallel.BucketCombiner[(K, V), ParHashMap[K, V], (K, V), HashMapCombiner[K, V]](HashMapCombiner.rootsize) {
 self: EnvironmentPassingCombiner[(K, V), ParHashMap[K, V]] =>
   import HashMapCombiner._
+  val emptyTrie = HashMap.empty[K, V]
 
   def +=(elem: (K, V)) = {
     sz += 1
-    val hc = elem._1.##
+    val hc = emptyTrie.computeHash(elem._1)
     val pos = hc & 0x1f
     if (lasts(pos) eq null) {
       // initialize bucket
@@ -149,6 +166,10 @@ self: EnvironmentPassingCombiner[(K, V), ParHashMap[K, V]] =>
     }
   }
 
+  override def toString = {
+    "HashTrieCombiner(buckets:\n\t" + heads.filter(_ != null).mkString("\n\t") + ")\n"
+  }
+
   /* tasks */
 
   class CreateTrie(buckets: Array[Unrolled[(K, V)]], root: Array[HashMap[K, V]], offset: Int, howmany: Int) extends super.Task[Unit, CreateTrie] {
@@ -171,7 +192,7 @@ self: EnvironmentPassingCombiner[(K, V), ParHashMap[K, V]] =>
         val chunksz = unrolled.size
         while (i < chunksz) {
           val kv = chunkarr(i)
-          val hc = kv._1.##
+          val hc = trie.computeHash(kv._1)
           trie = trie.updated0(kv._1, hc, rootbits, kv._2, kv, null)
           i += 1
         }
diff --git a/src/library/scala/collection/parallel/immutable/ParHashSet.scala b/src/library/scala/collection/parallel/immutable/ParHashSet.scala
index c9554ae1eb..33e2e7102a 100644
--- a/src/library/scala/collection/parallel/immutable/ParHashSet.scala
+++ b/src/library/scala/collection/parallel/immutable/ParHashSet.scala
@@ -106,10 +106,11 @@ private[immutable] abstract class HashSetCombiner[T]
 extends collection.parallel.BucketCombiner[T, ParHashSet[T], Any, HashSetCombiner[T]](HashSetCombiner.rootsize) {
 self: EnvironmentPassingCombiner[T, ParHashSet[T]] =>
   import HashSetCombiner._
+  val emptyTrie = HashSet.empty[T]
 
   def +=(elem: T) = {
     sz += 1
-    val hc = elem.##
+    val hc = emptyTrie.computeHash(elem)
     val pos = hc & 0x1f
     if (lasts(pos) eq null) {
       // initialize bucket
@@ -166,7 +167,7 @@ self: EnvironmentPassingCombiner[T, ParHashSet[T]] =>
         val chunksz = unrolled.size
         while (i < chunksz) {
           val v = chunkarr(i).asInstanceOf[T]
-          val hc = v.##
+          val hc = trie.computeHash(v)
           trie = trie.updated0(v, hc, rootbits)
           i += 1
         }
diff --git a/src/library/scala/collection/parallel/package.scala b/src/library/scala/collection/parallel/package.scala
index bb53dcdaaa..de3a790727 100644
--- a/src/library/scala/collection/parallel/package.scala
+++ b/src/library/scala/collection/parallel/package.scala
@@ -116,7 +116,7 @@ package object parallel {
         unrolled = unrolled.next
       }
     }
-    override def toString = array.mkString("Unrolled(", ", ", ")")
+    override def toString = array.take(size).mkString("Unrolled(", ", ", ")") + (if (next ne null) next.toString else "")
   }
 
   /** A helper iterator for iterating very small array buffers.
author	Aleksandar Pokopec <aleksandar.prokopec@epfl.ch>	2010-10-20 20:20:08 +0000
committer	Aleksandar Pokopec <aleksandar.prokopec@epfl.ch>	2010-10-20 20:20:08 +0000
commit	b689b912caa0e1bcd981c1a0092857ac83fb1e1d (patch)
tree	194b115ae192b472baf904de14f7c61bce6af915 /src
parent	b1d9354a08107afab9c15d2ae1b8dded82b5ffef (diff)
download	scala-b689b912caa0e1bcd981c1a0092857ac83fb1e1d.tar.gz scala-b689b912caa0e1bcd981c1a0092857ac83fb1e1d.tar.bz2 scala-b689b912caa0e1bcd981c1a0092857ac83fb1e1d.zip