diff options
author | RĂ¼diger Klaehn <rklaehn@gmail.com> | 2014-01-15 21:12:23 +0100 |
---|---|---|
committer | RĂ¼diger Klaehn <rklaehn@gmail.com> | 2014-01-15 21:12:23 +0100 |
commit | afcfba02edf342ea020f8c45c3ef83168c45e5ae (patch) | |
tree | 588827e627c0eed46c233f05ca6bc25ee7074757 /src | |
parent | 07e823ad9710734092c81fa7734e38bbd4fe2efa (diff) | |
download | scala-afcfba02edf342ea020f8c45c3ef83168c45e5ae.tar.gz scala-afcfba02edf342ea020f8c45c3ef83168c45e5ae.tar.bz2 scala-afcfba02edf342ea020f8c45c3ef83168c45e5ae.zip |
SI-6196 - Set should implement filter
Implements a version of filter and filterNot that reuses as much as
possible from the existing tree instead of building an entirely new one
like the builder-based filter does. This results in significant performance
improvements on average.
Adds a test of basic correctness of filter and filterNot as well as of the
desirable properties of the new filter implementation.
This is a collaboration between me and @Ichoran
Diffstat (limited to 'src')
-rw-r--r-- | src/library/scala/collection/immutable/HashSet.scala | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/src/library/scala/collection/immutable/HashSet.scala b/src/library/scala/collection/immutable/HashSet.scala index 115be09502..062fe7c158 100644 --- a/src/library/scala/collection/immutable/HashSet.scala +++ b/src/library/scala/collection/immutable/HashSet.scala @@ -38,6 +38,8 @@ class HashSet[A] extends AbstractSet[A] with CustomParallelizable[A, ParHashSet[A]] with Serializable { + import HashSet.{nullToEmpty, bufferSize} + override def companion: GenericCompanion[HashSet] = HashSet //class HashSet[A] extends Set[A] with SetLike[A, HashSet[A]] { @@ -86,6 +88,18 @@ class HashSet[A] extends AbstractSet[A] def - (e: A): HashSet[A] = removed0(e, computeHash(e), 0) + override def filter(p: A => Boolean) = { + val buffer = new Array[HashSet[A]](bufferSize(size)) + nullToEmpty(filter0(p, false, 0, buffer, 0)) + } + + override def filterNot(p: A => Boolean) = { + val buffer = new Array[HashSet[A]](bufferSize(size)) + nullToEmpty(filter0(p, true, 0, buffer, 0)) + } + + protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = null + protected def elemHashCode(key: A) = key.## protected final def improve(hcode: Int) = { @@ -179,6 +193,9 @@ object HashSet extends ImmutableSetFactory[HashSet] { override def removed0(key: A, hash: Int, level: Int): HashSet[A] = if (hash == this.hash && key == this.key) HashSet.empty[A] else this + override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = + if (negate ^ p(key)) this else null + override def iterator: Iterator[A] = Iterator(key) override def foreach[U](f: A => U): Unit = f(key) } @@ -214,6 +231,20 @@ object HashSet extends ImmutableSetFactory[HashSet] { new HashSetCollision1(hash, ks1) } else this + override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = { + val ks1 = if(negate) ks.filterNot(p) else ks.filter(p) + ks1.size match { + case 0 => + null + case 1 => + new HashSet1(ks1.head, hash) + case x if x == ks.size => + this + case _ => + new HashSetCollision1(hash, ks1) + } + } + override def iterator: Iterator[A] = ks.iterator override def foreach[U](f: A => U): Unit = ks.foreach(f) @@ -392,6 +423,52 @@ object HashSet extends ImmutableSetFactory[HashSet] { false } + override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = { + // current offset + var offset = offset0 + // result size + var rs = 0 + // bitmap for kept elems + var kept = 0 + // loop over all elements + var i = 0 + while (i < elems.length) { + val result = elems(i).filter0(p, negate, level + 5, buffer, offset) + if (result ne null) { + buffer(offset) = result + offset += 1 + // add the result size + rs += result.size + // mark the bit i as kept + kept |= (1 << i) + } + i += 1 + } + if (offset == offset0) { + // empty + null + } else if (rs == size0) { + // unchanged + this + } else if (offset == offset0 + 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]]) { + // leaf + buffer(offset0) + } else { + // we have to return a HashTrieSet + val length = offset - offset0 + val elems1 = new Array[HashSet[A]](length) + System.arraycopy(buffer, offset0, elems1, 0, length) + val bitmap1 = if (length == elems.length) { + // we can reuse the original bitmap + bitmap + } else { + // calculate new bitmap by keeping just bits in the kept bitmask + keepBits(bitmap, kept) + } + new HashTrieSet(bitmap1, elems1, rs) + } + } + override def iterator = new TrieIterator[A](elems.asInstanceOf[Array[Iterable[A]]]) { final override def getElem(cc: AnyRef): A = cc.asInstanceOf[HashSet1[A]].key } @@ -405,6 +482,50 @@ object HashSet extends ImmutableSetFactory[HashSet] { } } + /** + * Calculates the maximum buffer size given the maximum possible total size of the trie-based collection + * @param size the maximum size of the collection to be generated + * @return the maximum buffer size + */ + @inline private def bufferSize(size: Int): Int = (size + 6) min (32 * 7) + + /** + * In many internal operations the empty set is represented as null for performance reasons. This method converts + * null to the empty set for use in public methods + */ + @inline private def nullToEmpty[A](s: HashSet[A]): HashSet[A] = if (s eq null) empty[A] else s + + /** + * Utility method to keep a subset of all bits in a given bitmap + * + * Example + * bitmap (binary): 00000001000000010000000100000001 + * keep (binary): 1010 + * result (binary): 00000001000000000000000100000000 + * + * @param bitmap the bitmap + * @param keep a bitmask containing which bits to keep + * @return the original bitmap with all bits where keep is not 1 set to 0 + */ + private def keepBits(bitmap: Int, keep: Int): Int = { + var result = 0 + var current = bitmap + var kept = keep + while (kept != 0) { + // lowest remaining bit in current + val lsb = current ^ (current & (current - 1)) + if ((kept & 1) != 0) { + // mark bit in result bitmap + result |= lsb + } + // clear lowest remaining one bit in abm + current &= ~lsb + // look at the next kept bit + kept >>>= 1 + } + result + } + @SerialVersionUID(2L) private class SerializationProxy[A,B](@transient private var orig: HashSet[A]) extends Serializable { private def writeObject(out: java.io.ObjectOutputStream) { val s = orig.size |