summaryrefslogtreecommitdiff
path: root/src/library
diff options
context:
space:
mode:
authorIchoran <ichoran@gmail.com>2014-01-15 00:23:49 -0800
committerIchoran <ichoran@gmail.com>2014-01-15 00:23:49 -0800
commitc5bb3a1a273806dfac9f874bcb2eda504c95e8ef (patch)
treef1f7320a1ddc72ff224307afe1ca728a57a227f5 /src/library
parentfadf42bc20c8596ffcf421b9e283ebc780ecf602 (diff)
parentb33740f0b46ea8f8f6eb5efa7ecfcf7bcb6b57cd (diff)
downloadscala-c5bb3a1a273806dfac9f874bcb2eda504c95e8ef.tar.gz
scala-c5bb3a1a273806dfac9f874bcb2eda504c95e8ef.tar.bz2
scala-c5bb3a1a273806dfac9f874bcb2eda504c95e8ef.zip
Merge pull request #3315 from rklaehn/issue/7326
Implements specialized subsetOf for HashSet
Diffstat (limited to 'src/library')
-rw-r--r--src/library/scala/collection/immutable/HashSet.scala121
1 files changed, 120 insertions, 1 deletions
diff --git a/src/library/scala/collection/immutable/HashSet.scala b/src/library/scala/collection/immutable/HashSet.scala
index 82a1e471df..115be09502 100644
--- a/src/library/scala/collection/immutable/HashSet.scala
+++ b/src/library/scala/collection/immutable/HashSet.scala
@@ -12,9 +12,9 @@ package scala
package collection
package immutable
-import scala.annotation.unchecked.{ uncheckedVariance => uV }
import generic._
import scala.collection.parallel.immutable.ParHashSet
+import scala.collection.GenSet
/** This class implements immutable sets using a hash trie.
*
@@ -54,6 +54,30 @@ class HashSet[A] extends AbstractSet[A]
def contains(e: A): Boolean = get0(e, computeHash(e), 0)
+ override def subsetOf(that: GenSet[A]) = that match {
+ case that:HashSet[A] =>
+ // call the specialized implementation with a level of 0 since both this and that are top-level hash sets
+ subsetOf0(that, 0)
+ case _ =>
+ // call the generic implementation
+ super.subsetOf(that)
+ }
+
+ /**
+ * A specialized implementation of subsetOf for when both this and that are HashSet[A] and we can take advantage
+ * of the tree structure of both operands and the precalculated hashcodes of the HashSet1 instances.
+ * @param that the other set
+ * @param level the level of this and that hashset
+ * The purpose of level is to keep track of how deep we are in the tree.
+ * We need this information for when we arrive at a leaf and have to call get0 on that
+ * The value of level is 0 for a top-level HashSet and grows in increments of 5
+ * @return true if all elements of this set are contained in that set
+ */
+ protected def subsetOf0(that: HashSet[A], level: Int) = {
+ // The default implementation is for the empty set and returns true because the empty set is a subset of all sets
+ true
+ }
+
override def + (e: A): HashSet[A] = updated0(e, computeHash(e), 0)
override def + (elem1: A, elem2: A, elems: A*): HashSet[A] =
@@ -133,6 +157,14 @@ object HashSet extends ImmutableSetFactory[HashSet] {
override def get0(key: A, hash: Int, level: Int): Boolean =
(hash == this.hash && key == this.key)
+ override def subsetOf0(that: HashSet[A], level: Int) = {
+ // check if that contains this.key
+ // we use get0 with our key and hash at the correct level instead of calling contains,
+ // which would not work since that might not be a top-level HashSet
+ // and in any case would be inefficient because it would require recalculating the hash code
+ that.get0(key, hash, level)
+ }
+
override def updated0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash && key == this.key) this
else {
@@ -159,6 +191,14 @@ object HashSet extends ImmutableSetFactory[HashSet] {
override def get0(key: A, hash: Int, level: Int): Boolean =
if (hash == this.hash) ks.contains(key) else false
+ override def subsetOf0(that: HashSet[A], level: Int) = {
+ // we have to check each element
+ // we use get0 with our hash at the correct level instead of calling contains,
+ // which would not work since that might not be a top-level HashSet
+ // and in any case would be inefficient because it would require recalculating the hash code
+ ks.forall(key => that.get0(key, hash, level))
+ }
+
override def updated0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash) new HashSetCollision1(hash, ks + key)
else makeHashTrieSet(this.hash, this, hash, new HashSet1(key, hash), level)
@@ -194,6 +234,42 @@ object HashSet extends ImmutableSetFactory[HashSet] {
}
+ /**
+ * A branch node of the HashTrieSet with at least one and up to 32 children.
+ *
+ * @param bitmap encodes which element corresponds to which child
+ * @param elems the up to 32 children of this node.
+ * the number of children must be identical to the number of 1 bits in bitmap
+ * @param size0 the total number of elements. This is stored just for performance reasons.
+ * @tparam A the type of the elements contained in this hash set.
+ *
+ * How levels work:
+ *
+ * When looking up or adding elements, the part of the hashcode that is used to address the children array depends
+ * on how deep we are in the tree. This is accomplished by having a level parameter in all internal methods
+ * that starts at 0 and increases by 5 (32 = 2^5) every time we go deeper into the tree.
+ *
+ * hashcode (binary): 00000000000000000000000000000000
+ * level=0 (depth=0) ^^^^^
+ * level=5 (depth=1) ^^^^^
+ * level=10 (depth=2) ^^^^^
+ * ...
+ *
+ * Be careful: a non-toplevel HashTrieSet is not a self-contained set, so e.g. calling contains on it will not work!
+ * It relies on its depth in the Trie for which part of a hash to use to address the children, but this information
+ * (the level) is not stored due to storage efficiency reasons but has to be passed explicitly!
+ *
+ * How bitmap and elems correspond:
+ *
+ * A naive implementation of a HashTrieSet would always have an array of size 32 for children and leave the unused
+ * children empty (null). But that would be very wasteful regarding memory. Instead, only non-empty children are
+ * stored in elems, and the bitmap is used to encode which elem corresponds to which child bucket. The lowest 1 bit
+ * corresponds to the first element, the second-lowest to the second, etc.
+ *
+ * bitmap (binary): 00010000000000000000100000000000
+ * elems: [a,b]
+ * children: ---b----------------a-----------
+ */
class HashTrieSet[A](private val bitmap: Int, private[collection] val elems: Array[HashSet[A]], private val size0: Int)
extends HashSet[A] {
assert(Integer.bitCount(bitmap) == elems.length)
@@ -273,6 +349,49 @@ object HashSet extends ImmutableSetFactory[HashSet] {
}
}
+ override def subsetOf0(that: HashSet[A], level: Int): Boolean = if (that eq this) true else that match {
+ case that: HashTrieSet[A] if this.size0 <= that.size0 =>
+ // create local mutable copies of members
+ var abm = this.bitmap
+ val a = this.elems
+ var ai = 0
+ val b = that.elems
+ var bbm = that.bitmap
+ var bi = 0
+ if ((abm & bbm) == abm) {
+ // I tried rewriting this using tail recursion, but the generated java byte code was less than optimal
+ while(abm!=0) {
+ // highest remaining bit in abm
+ val alsb = abm ^ (abm & (abm - 1))
+ // highest remaining bit in bbm
+ val blsb = bbm ^ (bbm & (bbm - 1))
+ // if both trees have a bit set at the same position, we need to check the subtrees
+ if (alsb == blsb) {
+ // we are doing a comparison of a child of this with a child of that,
+ // so we have to increase the level by 5 to keep track of how deep we are in the tree
+ if (!a(ai).subsetOf0(b(bi), level + 5))
+ return false
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb; ai += 1
+ }
+ // clear lowermost remaining one bit in bbm and increase the b index
+ // we must do this in any case
+ bbm &= ~blsb; bi += 1
+ }
+ true
+ } else {
+ // the bitmap of this contains more one bits than the bitmap of that,
+ // so this can not possibly be a subset of that
+ false
+ }
+ case _ =>
+ // if the other set is a HashTrieSet but has less elements than this, it can not be a subset
+ // if the other set is a HashSet1, we can not be a subset of it because we are a HashTrieSet with at least two children (see assertion)
+ // if the other set is a HashSetCollision1, we can not be a subset of it because we are a HashTrieSet with at least two different hash codes
+ // if the other set is the empty set, we are not a subset of it because we are not empty
+ false
+ }
+
override def iterator = new TrieIterator[A](elems.asInstanceOf[Array[Iterable[A]]]) {
final override def getElem(cc: AnyRef): A = cc.asInstanceOf[HashSet1[A]].key
}