diff options
author | Aleksandar Pokopec <aleksandar.prokopec@epfl.ch> | 2010-06-09 07:56:13 +0000 |
---|---|---|
committer | Aleksandar Pokopec <aleksandar.prokopec@epfl.ch> | 2010-06-09 07:56:13 +0000 |
commit | 7aae8c7cbcd272f56629ada19623316399ca397f (patch) | |
tree | e95467de73a6af7b46484440769134d32346a05a | |
parent | e045a3ff33793e1d5bb61797f1afdcc3aba9e4c7 (diff) | |
download | scala-7aae8c7cbcd272f56629ada19623316399ca397f.tar.gz scala-7aae8c7cbcd272f56629ada19623316399ca397f.tar.bz2 scala-7aae8c7cbcd272f56629ada19623316399ca397f.zip |
Added `combine` and `split` to immutable.HashMap.
Under test/benchmarks there is a `bench` script to run benchmarks - it can be invoked after running building the library.
Review by rompf.
13 files changed, 431 insertions, 23 deletions
diff --git a/src/library/scala/collection/immutable/HashMap.scala b/src/library/scala/collection/immutable/HashMap.scala index 01ef597d24..09620938ca 100644 --- a/src/library/scala/collection/immutable/HashMap.scala +++ b/src/library/scala/collection/immutable/HashMap.scala @@ -76,9 +76,12 @@ class HashMap[A, +B] extends Map[A,B] with MapLike[A, B, HashMap[A, B]] { protected def removed0(key: A, hash: Int, level: Int): HashMap[A, B] = this - protected def writeReplace(): AnyRef = new HashMap.SerializationProxy(this) + def split: Seq[HashMap[A, B]] = Seq(this) + + def combine[B1 >: B](that: HashMap[A, B1]): HashMap[A, B1] = that + } /** $factoryInfo @@ -124,6 +127,7 @@ object HashMap extends ImmutableMapFactory[HashMap] { override def iterator: Iterator[(A,B)] = Iterator(ensurePair) override def foreach[U](f: ((A, B)) => U): Unit = f(ensurePair) private[HashMap] def ensurePair: (A,B) = if (kv ne null) kv else { kv = (key, value); kv } + override def combine[B1 >: B](that: HashMap[A, B1]): HashMap[A, B1] = that + (ensurePair) } private class HashMapCollision1[A,+B](private[HashMap] var hash: Int, var kvs: ListMap[A,B @uncheckedVariance]) extends HashMap[A,B] { @@ -153,11 +157,22 @@ object HashMap extends ImmutableMapFactory[HashMap] { override def iterator: Iterator[(A,B)] = kvs.iterator override def foreach[U](f: ((A, B)) => U): Unit = kvs.foreach(f) + override def split: Seq[HashMap[A, B]] = { + val (x, y) = kvs.splitAt(kvs.size / 2) + def newhm(lm: ListMap[A, B @uncheckedVariance]) = new HashMapCollision1(hash, lm) + List(newhm(x), newhm(y)) + } + override def combine[B1 >: B](that: HashMap[A, B1]): HashMap[A, B1] = { + // TODO this can be made more efficient by passing the entire ListMap at once + var m = that + for ((k, v) <- kvs) m = m.updated0(k, this.hash, 0, v, null) + m + } } - - class HashTrieMap[A,+B](private var bitmap: Int, private var elems: Array[HashMap[A,B @uncheckedVariance]], - private var size0: Int) extends HashMap[A,B] { + var dives = 0 + class HashTrieMap[A,+B](private[HashMap] var bitmap: Int, private[HashMap] var elems: Array[HashMap[A,B @uncheckedVariance]], + private[HashMap] var size0: Int) extends HashMap[A,B] { /* def this (level: Int, m1: HashMap1[A,B], m2: HashMap1[A,B]) = { this(((m1.hash >>> level) & 0x1f) | ((m2.hash >>> level) & 0x1f), { @@ -346,6 +361,109 @@ time { mNew.iterator.foreach( p => ()) } } } + private def printBitmap(bm: Int) { + var i = 31 + var b = bm + while (i != 0) { + print((b & 1) + " ") + b = b >>> 1 + i -= 1 + } + println + } + + private def subtreeCount(bm: Int) = { + var c = 0 + var b = bm + while (b != 0) { + if ((b & 1) != 0) c += 1 + b = b >>> 1 + } + c + } + + private def posOf(n: Int, bm: Int) = { + var left = n + var i = -1 + var b = bm + while (left >= 0) { + i += 1 + if ((b & 1) != 0) left -= 1 + b = b >>> 1 + } + i + } + + override def split: Seq[HashMap[A, B]] = { + // printBitmap(bitmap) + // println(elems.toList) + + // println("subtrees: " + subtreeCount(bitmap)) + // println("will split at: " + posOf(subtreeCount(bitmap) / 2, bitmap)) + val splitpoint = posOf(subtreeCount(bitmap) / 2, bitmap) + val bm1 = bitmap & (-1 << splitpoint) + val bm2 = bitmap & (-1 >>> (32 - splitpoint)) + // printBitmap(bm1) + // printBitmap(bm2) + val (e1, e2) = elems.splitAt(splitpoint) + // println(e1.toList) + // println(e2.toList) + val hm1 = new HashTrieMap(bm1, e1, e1.foldLeft(0)(_ + _.size)) + val hm2 = new HashTrieMap(bm2, e2, e2.foldLeft(0)(_ + _.size)) + + List(hm1, hm2) + } + + override def combine[B1 >: B](that: HashMap[A, B1]): HashMap[A, B1] = that match { + case hm: HashMap1[_, _] => this ++ hm.asInstanceOf[HashMap[A, B1]] + case hm: HashMapCollision1[_, _] => this ++ hm.asInstanceOf[HashMap[A, B1]] + case hm: HashTrieMap[_, _] => + val that = hm.asInstanceOf[HashTrieMap[A, B1]] + val thiselems = this.elems + val thatelems = that.elems + val thisbm = this.bitmap + val thatbm = that.bitmap + val combinedbitmap = thisbm | thatbm + val bothbitmap = thisbm & thatbm + + // determine the necessary size for the array + val subcount = subtreeCount(combinedbitmap) + //printBitmap(combinedbitmap) + //println(subcount) + + // construct a new array of appropriate size + val combined = new Array[HashMap[A, B1 @uncheckedVariance]](subcount) + + // run through both bitmaps and add elements to it + var pos = 1 + var i = 0 + var thisi = 0 + var thati = 0 + while (pos != 0) { + if ((bothbitmap & pos) != 0) { + combined(i) = thiselems(thisi) combine thatelems(thati) + dives += 1 + i += 1 + thisi += 1 + thati += 1 + } else if ((thisbm & pos) != 0) { + combined(i) = thiselems(thisi) + i += 1 + thisi += 1 + } else if ((thatbm & pos) != 0) { + combined(i) = thatelems(thati) + i += 1 + thati += 1 + } + pos = pos << 1 + } + //println(combined.toList) + + new HashTrieMap[A, B1](combinedbitmap, combined, this.size + that.size) + case empty: HashMap[_, _] => this + case _ => error("section supposed to be unreachable.") + } + } @serializable @SerialVersionUID(2L) private class SerializationProxy[A,B](@transient private var orig: HashMap[A, B]) { diff --git a/src/parallel-collections/scala/collection/generic/GenericParallelTemplate.scala b/src/parallel-collections/scala/collection/generic/GenericParallelTemplate.scala index 196bc2fcd4..58454be04e 100644 --- a/src/parallel-collections/scala/collection/generic/GenericParallelTemplate.scala +++ b/src/parallel-collections/scala/collection/generic/GenericParallelTemplate.scala @@ -1,29 +1,36 @@ package scala.collection.generic + import scala.collection.parallel.Combiner import scala.collection.parallel.ParallelIterable import scala.collection.parallel.TaskSupport +import annotation.unchecked.uncheckedVariance + + + -/** - * A template trait for collections having a companion. + +/** A template trait for collections having a companion. * - * @tparam A the element type of the collection - * @tparam CC the type constructor representing the collection class - * @since 2.8 + * @tparam A the element type of the collection + * @tparam CC the type constructor representing the collection class + * @since 2.8 + * @author prokopec */ trait GenericParallelTemplate[+A, +CC[X] <: ParallelIterable[X]] extends GenericTraversableTemplate[A, CC] + with HasNewCombiner[A, CC[A] @uncheckedVariance] with TaskSupport { def companion: GenericCompanion[CC] with GenericParallelCompanion[CC] - override protected[this] def newBuilder: collection.mutable.Builder[A, CC[A]] = newCombiner + protected[this] override def newBuilder: collection.mutable.Builder[A, CC[A]] = newCombiner - protected[this] def newCombiner: Combiner[A, CC[A]] = { + protected[this] override def newCombiner: Combiner[A, CC[A]] = { val cb = companion.newCombiner[A] cb.environment = environment cb @@ -44,3 +51,5 @@ extends GenericTraversableTemplate[A, CC] + + diff --git a/src/parallel-collections/scala/collection/generic/HasNewCombiner.scala b/src/parallel-collections/scala/collection/generic/HasNewCombiner.scala new file mode 100644 index 0000000000..2c24b437d8 --- /dev/null +++ b/src/parallel-collections/scala/collection/generic/HasNewCombiner.scala @@ -0,0 +1,26 @@ +package scala.collection.generic + + + +import scala.collection.parallel.Combiner + + + +trait HasNewCombiner[+T, +Repr] { + protected[this] def newCombiner: Combiner[T, Repr] +} + + + + + + + + + + + + + + + diff --git a/src/parallel-collections/scala/collection/generic/ParallelFactory.scala b/src/parallel-collections/scala/collection/generic/ParallelFactory.scala index f7144b0423..86a5fdf822 100644 --- a/src/parallel-collections/scala/collection/generic/ParallelFactory.scala +++ b/src/parallel-collections/scala/collection/generic/ParallelFactory.scala @@ -6,12 +6,11 @@ import scala.collection.parallel.Combiner -/** - * A template class for companion objects of `ParallelIterable` and subclasses thereof. - * This class extends `TraversableFactory` and provides a set of operations to create `$Coll` objects. +/** A template class for companion objects of `ParallelIterable` and subclasses thereof. + * This class extends `TraversableFactory` and provides a set of operations to create `$Coll` objects. * - * @define $coll collection - * @define $Coll Parallel + * @define $coll parallel collection + * @define $Coll ParallelIterable */ abstract class ParallelFactory[CC[X] <: ParallelIterable[X] with GenericParallelTemplate[X, CC]] extends TraversableFactory[CC] diff --git a/src/parallel-collections/scala/collection/generic/ParallelMapFactory.scala b/src/parallel-collections/scala/collection/generic/ParallelMapFactory.scala new file mode 100644 index 0000000000..ceda9d1155 --- /dev/null +++ b/src/parallel-collections/scala/collection/generic/ParallelMapFactory.scala @@ -0,0 +1,39 @@ +package scala.collection.generic + + + +import scala.collection.parallel.ParallelMap +import scala.collection.parallel.ParallelMapLike +import scala.collection.parallel.Combiner +import scala.collection.mutable.Builder + + + + +/** A template class for companion objects of `ParallelMap` and subclasses thereof. + * This class extends `TraversableFactory` and provides a set of operations to create `$Coll` objects. + * + * @define $coll parallel map + * @define $Coll ParallelMap + */ +abstract class ParallelMapFactory[CC[X, Y] <: ParallelMap[X, Y] with ParallelMapLike[X, Y, CC[X, Y], _]] +extends MapFactory[CC] { + + /** The default builder for $Coll objects. + * @tparam K the type of the keys + * @tparam V the type of the associated values + */ + override def newBuilder[K, V]: Builder[(K, V), CC[K, V]] = newCombiner[K, V] + + /** The default combiner for $Coll objects. + * @tparam K the type of the keys + * @tparam V the type of the associated values + */ + def newCombiner[K, V]: Combiner[(K, V), CC[K, V]] = null // TODO + + class ParallelMapCanBuildFrom[K, V] extends CanBuildFromParallel[CC[_, _], (K, V), CC[K, V]] { + def apply(from: CC[_, _]) = newCombiner[K, V] + def apply() = newCombiner[K, V] + } + +} diff --git a/src/parallel-collections/scala/collection/parallel/ParallelIterableLike.scala b/src/parallel-collections/scala/collection/parallel/ParallelIterableLike.scala index bf5dbeb5f9..5ed6d10195 100644 --- a/src/parallel-collections/scala/collection/parallel/ParallelIterableLike.scala +++ b/src/parallel-collections/scala/collection/parallel/ParallelIterableLike.scala @@ -120,6 +120,7 @@ extends IterableLike[T, Repr] with Parallelizable[T, Repr] with Sequentializable[T, SequentialView] with Parallel + with HasNewCombiner[T, Repr] with TaskSupport { self => @@ -163,11 +164,6 @@ extends IterableLike[T, Repr] */ type SCPI <: SignalContextPassingIterator[ParallelIterator] - /** Creates a new parallel builder working on the given pool. - * @return a new parallel builder. - */ - protected[this] def newCombiner: Combiner[T, Repr] - /** Creates a new parallel iterator used to traverse the elements of this parallel collection. * This iterator is more specific than the iterator of the returned by `iterator`, and augmented * with additional accessor and transformer methods. diff --git a/src/parallel-collections/scala/collection/parallel/ParallelMap.scala b/src/parallel-collections/scala/collection/parallel/ParallelMap.scala new file mode 100644 index 0000000000..c03f14b198 --- /dev/null +++ b/src/parallel-collections/scala/collection/parallel/ParallelMap.scala @@ -0,0 +1,61 @@ +package scala.collection.parallel + + + + + +import scala.collection.Map +import scala.collection.mutable.Builder +import scala.collection.generic.ParallelMapFactory +import scala.collection.generic.CanBuildFromParallel + + + + + + +trait ParallelMap[K, +V] +extends Map[K, V] + with ParallelIterable[(K, V)] + with ParallelMapLike[K, V, ParallelMap[K, V], Map[K, V]] +{ self => + + override def empty: ParallelMap[K, V] = null // TODO + +} + + + +object ParallelMap extends ParallelMapFactory[ParallelMap] { + def empty[A, B]: ParallelMap[A, B] = null // TODO + + implicit def canBuildFrom[A, B]: CanBuildFromParallel[Coll, (A, B), Map[A, B]] = null // TODO +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/parallel-collections/scala/collection/parallel/ParallelMapLike.scala b/src/parallel-collections/scala/collection/parallel/ParallelMapLike.scala new file mode 100644 index 0000000000..eddc2963fa --- /dev/null +++ b/src/parallel-collections/scala/collection/parallel/ParallelMapLike.scala @@ -0,0 +1,43 @@ +package scala.collection.parallel + + + + +import scala.collection.MapLike +import scala.collection.Map +import scala.collection.mutable.Builder + + + + + + + + +trait ParallelMapLike[K, + +V, + +Repr <: ParallelMapLike[K, V, Repr, SequentialView] with ParallelMap[K, V], + +SequentialView <: Map[K, V]] +extends MapLike[K, V, Repr] + with ParallelIterableLike[(K, V), Repr, SequentialView] +{ self => + + protected[this] override def newBuilder: Builder[(K, V), Repr] = null // TODO + + protected[this] override def newCombiner: Combiner[(K, V), Repr] = null // TODO + + override def empty: Repr + +} + + + + + + + + + + + + diff --git a/src/parallel-collections/scala/collection/parallel/immutable/ParallelHashTrie.scala b/src/parallel-collections/scala/collection/parallel/immutable/ParallelHashTrie.scala new file mode 100644 index 0000000000..be379d9e5e --- /dev/null +++ b/src/parallel-collections/scala/collection/parallel/immutable/ParallelHashTrie.scala @@ -0,0 +1,60 @@ +package scala.collection.parallel.immutable + + + + + + + +import scala.collection.parallel.ParallelMap +import scala.collection.parallel.ParallelMapLike +import scala.collection.immutable.HashMap + + + + + + + + +class ParallelHashTrie[K, +V] +extends ParallelMap[K, V] + with ParallelMapLike[K, V, ParallelHashTrie[K, V], HashMap[K, V]] +{ self => + + override def empty: ParallelHashTrie[K, V] = null // TODO + + def parallelIterator = null // TODO + + def seq = null // TODO + + def -(k: K) = null // TODO + + def +[U >: V](kv: (K, U)) = null // TODO + + def get(k: K) = None // TODO + +} + + + + + +object ParallelHashTrie { + +} + + + + + + + + + + + + + + + diff --git a/test/benchmarks/src/scala/collection/parallel/Benchmarking.scala b/test/benchmarks/src/scala/collection/parallel/Benchmarking.scala index 371ebccef4..f6aa63cb1a 100644 --- a/test/benchmarks/src/scala/collection/parallel/Benchmarking.scala +++ b/test/benchmarks/src/scala/collection/parallel/Benchmarking.scala @@ -92,6 +92,7 @@ trait BenchmarkRegister { register(hashtries.Iterate) register(hashtries.Construct) register(hashtries.Lookup) + register(hashtries.Combine) } diff --git a/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Combine.scala b/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Combine.scala new file mode 100644 index 0000000000..8c8d17e745 --- /dev/null +++ b/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Combine.scala @@ -0,0 +1,55 @@ +package scala.collection.parallel.benchmarks +package hashtries + + + + +import collection.immutable.{HashMap => HashTrie} +import collection.mutable.HashMap + + + + + + +class Combine(val size: Int, val parallelism: Int, val runWhat: String) extends Bench with IntInit { + var thattrie = new HashTrie[Int, Int] + for (i <- size until 2 * size) thattrie += ((i, i)) + val thatmap = new HashMap[Int, Int] + for (i <- size until 2 * size) thatmap += ((i, i)) + + def runpar = throw new UnsupportedOperationException + def runseq = runhashtrie + def runhashtrie = hashtrie combine thattrie + def runappendtrie = hashtrie ++ thattrie + def runhashmap = hashmap ++ thatmap + def companion = Combine + def comparisonMap = Map("hashtrie" -> runhashtrie _, "hashmap" -> runhashmap _, "appendtrie" -> runappendtrie _) + override def reset = runWhat match { + case "appendtrie" => initHashTrie + case _ => super.reset + } +} + + +object Combine extends BenchCompanion { + def collectionName = "HashTrie" + def benchName = "combine"; + def apply(sz: Int, p: Int, what: String) = new Combine(sz, p, what) + override def defaultSize = 5000 +} + + + + + + + + + + + + + + + diff --git a/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Foreach.scala b/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Foreach.scala index 6002b5d8b8..f53ea02e36 100644 --- a/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Foreach.scala +++ b/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/Foreach.scala @@ -14,7 +14,7 @@ import collection.mutable.HashMap class Foreach(val size: Int, val parallelism: Int, val runWhat: String) extends Bench with IntInit { def runpar = throw new UnsupportedOperationException - def runseq = throw new UnsupportedOperationException + def runseq = runhashtrie def runhashmap = hashmap.foreach(n => ()) def runhashtrie = hashtrie.foreach(n => ()) def companion = Foreach diff --git a/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/IntInit.scala b/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/IntInit.scala index 3328dcbe0f..dbbe64e290 100644 --- a/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/IntInit.scala +++ b/test/benchmarks/src/scala/collection/parallel/benchmarks/hashtries/IntInit.scala @@ -17,6 +17,7 @@ trait IntInit extends Bench { def reset = runWhat match { case "hashmap" => initHashMap case "hashtrie" => initHashTrie + case "seq" => initHashTrie } def initHashTrie = { hashtrie = new HashTrie @@ -27,4 +28,4 @@ trait IntInit extends Bench { for (i <- 0 until size) hashmap += ((i, i)) } -}
\ No newline at end of file +} |