From 3db50837cca2f92aee48fe44bc38106dbc5f4c59 Mon Sep 17 00:00:00 2001 From: Rex Kerr Date: Sat, 29 Aug 2015 18:10:26 -0700 Subject: SI-9347 Efficient head/tail, if possible, for immutable maps & sets Most immutable collections, including sets and maps, have a better-than-O(n) method for removing an element. In those cases, tail and possibly head were overridden so the head/tail pattern can be used with less of a performance penalty. Speed improvements on head/tail pattern are (for sets/maps of size 1024, unless otherwise specified): ``` BitSet 190x HashSet 250x Set 400x Set2 9x Set4 12x HashMap 430x ListMap 2500x // size 128 Map 430x ``` Note: `ListMap` is actually `init`/`last` because it's maintained in reverse order. Altered ListMap docs to explain that reverse traversal is the fast way to do it. All tested sets/maps that were already fast are still fast. Test code is reproduced below, except it does ListSet with head/tail which doesn't show the improvement: ```scala object BenchTailSetMap { val th = new ichi.bench.Thyme val standard = 1 to 1024 val sets = Map[String, Set[Int]]( "Set" -> (Set.empty[Int] ++ standard), "Set4"-> Set(4, 7, 2, 1), "Set2"-> Set(3, 4), "HashSet" -> (collection.immutable.HashSet.empty[Int] ++ standard), "BitSet" -> (collection.immutable.BitSet.empty ++ standard), "SortedSet" -> (collection.immutable.SortedSet.empty[Int] ++ standard), "ListSet" -> (collection.immutable.ListSet.empty[Int] ++ standard) ) val pairs = standard.map(i => i -> i.toString) // ListMap implementation is HORRIBLE, O(n^3) tail! Cut down size. val maps = Map[String, Map[Int, String]]( "Map" -> (Map.empty[Int, String] ++ pairs), "HashMap" -> (collection.immutable.HashMap.empty[Int, String] ++ pairs), "SortedMap" -> (collection.immutable.SortedMap.empty[Int, String] ++ pairs), "ListMap" -> (collection.immutable.ListMap.empty[Int, String] ++ pairs.take(128)) ) def hts(s: Set[Int]) = { var si = s var x = 0 while (si.nonEmpty) { x += si.head si = si.tail } x } def htm(m: Map[Int, String]) = { var mi = m var x = 0 while (mi.nonEmpty) { x += mi.head._2.length mi = mi.tail } x } def run() { sets.toList.sortBy(_._1).foreach{ case (name, s) => th.pbench(hts(s), s.size, name) } maps.toList.sortBy(_._1).foreach{ case (name, m) => th.pbench(htm(m), m.size, name) } } } ``` --- src/library/scala/collection/BitSetLike.scala | 21 ++++++++++++++++ .../scala/collection/immutable/BitSet.scala | 29 ++++++++++++++++++++++ .../scala/collection/immutable/HashMap.scala | 7 +++++- .../scala/collection/immutable/HashSet.scala | 7 +++++- .../scala/collection/immutable/ListMap.scala | 9 +++++++ src/library/scala/collection/immutable/Set.scala | 9 ++++++- 6 files changed, 79 insertions(+), 3 deletions(-) (limited to 'src/library') diff --git a/src/library/scala/collection/BitSetLike.scala b/src/library/scala/collection/BitSetLike.scala index 29369447d1..209b00ebf9 100644 --- a/src/library/scala/collection/BitSetLike.scala +++ b/src/library/scala/collection/BitSetLike.scala @@ -204,6 +204,27 @@ trait BitSetLike[+This <: BitSetLike[This] with SortedSet[Int]] extends SortedSe def subsetOf(other: BitSet): Boolean = (0 until nwords) forall (idx => (this.word(idx) & ~ other.word(idx)) == 0L) + override def head: Int = { + val n = nwords + var i = 0 + while (i < n) { + val wi = word(i) + if (wi != 0L) return WordLength*i + java.lang.Long.numberOfTrailingZeros(wi) + i += 1 + } + throw new NoSuchElementException("Empty BitSet") + } + + override def last: Int = { + var i = nwords - 1 + while (i >= 0) { + val wi = word(i) + if (wi != 0L) return WordLength*i + 63 - java.lang.Long.numberOfLeadingZeros(wi) + i += 1 + } + throw new NoSuchElementException("Empty BitSet") + } + override def addString(sb: StringBuilder, start: String, sep: String, end: String) = { sb append start var pre = "" diff --git a/src/library/scala/collection/immutable/BitSet.scala b/src/library/scala/collection/immutable/BitSet.scala index 846bc22182..6bb1f116fe 100644 --- a/src/library/scala/collection/immutable/BitSet.scala +++ b/src/library/scala/collection/immutable/BitSet.scala @@ -103,6 +103,7 @@ object BitSet extends BitSetFactory[BitSet] { else new BitSetN(elems) } + @SerialVersionUID(2260107458435649300L) class BitSet1(val elems: Long) extends BitSet { protected def nwords = 1 protected def word(idx: Int) = if (idx == 0) elems else 0L @@ -110,6 +111,12 @@ object BitSet extends BitSetFactory[BitSet] { if (idx == 0) new BitSet1(w) else if (idx == 1) new BitSet2(elems, w) else fromBitMaskNoCopy(updateArray(Array(elems), idx, w)) + override def head: Int = + if (elems == 0L) throw new NoSuchElementException("Empty BitSet") + else java.lang.Long.numberOfTrailingZeros(elems) + override def tail: BitSet = + if (elems == 0L) throw new NoSuchElementException("Empty BitSet") + else new BitSet1(elems - java.lang.Long.lowestOneBit(elems)) } class BitSet2(val elems0: Long, elems1: Long) extends BitSet { @@ -119,6 +126,18 @@ object BitSet extends BitSetFactory[BitSet] { if (idx == 0) new BitSet2(w, elems1) else if (idx == 1) new BitSet2(elems0, w) else fromBitMaskNoCopy(updateArray(Array(elems0, elems1), idx, w)) + override def head: Int = + if (elems0 == 0L) { + if (elems1 == 0) throw new NoSuchElementException("Empty BitSet") + 64 + java.lang.Long.numberOfTrailingZeros(elems1) + } + else java.lang.Long.numberOfTrailingZeros(elems0) + override def tail: BitSet = + if (elems0 == 0L) { + if (elems1 == 0L) throw new NoSuchElementException("Empty BitSet") + new BitSet2(elems0, elems1 - java.lang.Long.lowestOneBit(elems1)) + } + else new BitSet2(elems0 - java.lang.Long.lowestOneBit(elems0), elems1) } /** The implementing class for bit sets with elements >= 128 (exceeding @@ -131,5 +150,15 @@ object BitSet extends BitSetFactory[BitSet] { protected def nwords = elems.length protected def word(idx: Int) = if (idx < nwords) elems(idx) else 0L protected def updateWord(idx: Int, w: Long): BitSet = fromBitMaskNoCopy(updateArray(elems, idx, w)) + override def tail: BitSet = { + val n = nwords + var i = 0 + while (i < n) { + val wi = word(i) + if (wi != 0L) return fromBitMaskNoCopy(updateArray(elems, i, wi - java.lang.Long.lowestOneBit(wi))) + i += 1 + } + throw new NoSuchElementException("Empty BitSet") + } } } diff --git a/src/library/scala/collection/immutable/HashMap.scala b/src/library/scala/collection/immutable/HashMap.scala index 92d915fe8b..3c41e1ba11 100644 --- a/src/library/scala/collection/immutable/HashMap.scala +++ b/src/library/scala/collection/immutable/HashMap.scala @@ -65,6 +65,8 @@ class HashMap[A, +B] extends AbstractMap[A, B] def - (key: A): HashMap[A, B] = removed0(key, computeHash(key), 0) + override def tail: HashMap[A, B] = this - head._1 + override def filter(p: ((A, B)) => Boolean) = { val buffer = new Array[HashMap[A, B]](bufferSize(size)) nullToEmpty(filter0(p, false, 0, buffer, 0)) @@ -156,7 +158,10 @@ object HashMap extends ImmutableMapFactory[HashMap] with BitOperations.Int { implicit def canBuildFrom[A, B]: CanBuildFrom[Coll, (A, B), HashMap[A, B]] = new MapCanBuildFrom[A, B] def empty[A, B]: HashMap[A, B] = EmptyHashMap.asInstanceOf[HashMap[A, B]] - private object EmptyHashMap extends HashMap[Any, Nothing] { } + private object EmptyHashMap extends HashMap[Any, Nothing] { + override def head: (Any, Nothing) = throw new NoSuchElementException("Empty Map") + override def tail: HashMap[Any, Nothing] = throw new NoSuchElementException("Empty Map") + } // utility method to create a HashTrieMap from two leaf HashMaps (HashMap1 or HashMapCollision1) with non-colliding hash code) private def makeHashTrieMap[A, B](hash0:Int, elem0:HashMap[A, B], hash1:Int, elem1:HashMap[A, B], level:Int, size:Int) : HashTrieMap[A, B] = { diff --git a/src/library/scala/collection/immutable/HashSet.scala b/src/library/scala/collection/immutable/HashSet.scala index 07758bf5a2..2d8cc0b386 100644 --- a/src/library/scala/collection/immutable/HashSet.scala +++ b/src/library/scala/collection/immutable/HashSet.scala @@ -162,6 +162,8 @@ class HashSet[A] extends AbstractSet[A] def - (e: A): HashSet[A] = nullToEmpty(removed0(e, computeHash(e), 0)) + override def tail: HashSet[A] = this - head + override def filter(p: A => Boolean) = { val buffer = new Array[HashSet[A]](bufferSize(size)) nullToEmpty(filter0(p, false, 0, buffer, 0)) @@ -213,7 +215,10 @@ object HashSet extends ImmutableSetFactory[HashSet] { /** $setCanBuildFromInfo */ implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, HashSet[A]] = setCanBuildFrom[A] - private object EmptyHashSet extends HashSet[Any] { } + private object EmptyHashSet extends HashSet[Any] { + override def head: Any = throw new NoSuchElementException("Empty Set") + override def tail: HashSet[Any] = throw new NoSuchElementException("Empty Set") + } private[collection] def emptyInstance: HashSet[Any] = EmptyHashSet // utility method to create a HashTrieSet from two leaf HashSets (HashSet1 or HashSetCollision1) with non-colliding hash code) diff --git a/src/library/scala/collection/immutable/ListMap.scala b/src/library/scala/collection/immutable/ListMap.scala index f30c0cbf8e..5bbcf44201 100644 --- a/src/library/scala/collection/immutable/ListMap.scala +++ b/src/library/scala/collection/immutable/ListMap.scala @@ -20,6 +20,9 @@ import scala.annotation.tailrec * @see [[http://docs.scala-lang.org/overviews/collections/concrete-immutable-collection-classes.html#list_maps "Scala's Collection Library overview"]] * section on `List Maps` for more information. * + * Note that `ListMap` is built in reverse order to canonical traversal order (traversal order is oldest first). + * Thus, `head` and `tail` are O(n). To rapidly partition a `ListMap` into elements, use `last` and `init` instead. These are O(1). + * * @define Coll immutable.ListMap * @define coll immutable list map */ @@ -33,6 +36,8 @@ object ListMap extends ImmutableMapFactory[ListMap] { private object EmptyListMap extends ListMap[Any, Nothing] { override def apply(key: Any) = throw new NoSuchElementException("key not found: " + key) override def contains(key: Any) = false + override def last: (Any, Nothing) = throw new NoSuchElementException("Empty ListMap") + override def init: ListMap[Any, Nothing] = throw new NoSuchElementException("Empty ListMap") } } @@ -216,5 +221,9 @@ extends AbstractMap[A, B] remove0(k, cur.next, cur::acc) override protected def next: ListMap[A, B1] = ListMap.this + + override def last: (A, B1) = (key, value) + + override def init: ListMap[A, B1] = next } } diff --git a/src/library/scala/collection/immutable/Set.scala b/src/library/scala/collection/immutable/Set.scala index 031e5248c1..3a8ee8b0be 100644 --- a/src/library/scala/collection/immutable/Set.scala +++ b/src/library/scala/collection/immutable/Set.scala @@ -103,10 +103,11 @@ object Set extends ImmutableSetFactory[Set] { if (p(elem1)) Some(elem1) else None } + override def head: A = elem1 + override def tail: Set[A] = Set.empty // Why is Set1 non-final? Need to fix that! @deprecatedOverriding("This immutable set should do nothing on toSet but cast itself to a Set with a wider element type.", "2.11.8") override def toSet[B >: A]: Set[B] = this.asInstanceOf[Set1[B]] - } /** An optimized representation for immutable sets of size 2 */ @@ -138,6 +139,8 @@ object Set extends ImmutableSetFactory[Set] { else if (p(elem2)) Some(elem2) else None } + override def head: A = elem1 + override def tail: Set[A] = new Set1(elem2) // Why is Set2 non-final? Need to fix that! @deprecatedOverriding("This immutable set should do nothing on toSet but cast itself to a Set with a wider element type.", "2.11.8") override def toSet[B >: A]: Set[B] = this.asInstanceOf[Set2[B]] @@ -174,6 +177,8 @@ object Set extends ImmutableSetFactory[Set] { else if (p(elem3)) Some(elem3) else None } + override def head: A = elem1 + override def tail: Set[A] = new Set2(elem2, elem3) // Why is Set3 non-final? Need to fix that! @deprecatedOverriding("This immutable set should do nothing on toSet but cast itself to a Set with a wider element type.", "2.11.8") override def toSet[B >: A]: Set[B] = this.asInstanceOf[Set3[B]] @@ -212,6 +217,8 @@ object Set extends ImmutableSetFactory[Set] { else if (p(elem4)) Some(elem4) else None } + override def head: A = elem1 + override def tail: Set[A] = new Set3(elem2, elem3, elem4) // Why is Set4 non-final? Need to fix that! @deprecatedOverriding("This immutable set should do nothing on toSet but cast itself to a Set with a wider element type.", "2.11.8") override def toSet[B >: A]: Set[B] = this.asInstanceOf[Set4[B]] -- cgit v1.2.3