summaryrefslogtreecommitdiff
path: root/src/reflect/scala/reflect/internal/util/WeakHashSet.scala
diff options
context:
space:
mode:
authorJames Iry <james.iry@typesafe.com>2013-05-17 10:28:55 -0700
committerJames Iry <james.iry@typesafe.com>2013-06-04 08:17:41 -0700
commitad63f3630705bd192d9c983399c572b655b3612b (patch)
treec83b611a10fd0d8c48b1ff7acdd86d7a32387d86 /src/reflect/scala/reflect/internal/util/WeakHashSet.scala
parentb88801fac7cb1d7d38d4b3cca4aab4603c48527e (diff)
downloadscala-ad63f3630705bd192d9c983399c572b655b3612b.tar.gz
scala-ad63f3630705bd192d9c983399c572b655b3612b.tar.bz2
scala-ad63f3630705bd192d9c983399c572b655b3612b.zip
SI-7150 Replace scala.reflect.internal.WeakHashSet
Replaces scala.reflect.internal.WeakHashSet with a version that * extends the mutable.Set trait * doesn't leak WeakReferences * is unit tested
Diffstat (limited to 'src/reflect/scala/reflect/internal/util/WeakHashSet.scala')
-rw-r--r--src/reflect/scala/reflect/internal/util/WeakHashSet.scala447
1 files changed, 409 insertions, 38 deletions
diff --git a/src/reflect/scala/reflect/internal/util/WeakHashSet.scala b/src/reflect/scala/reflect/internal/util/WeakHashSet.scala
index ef62fa481b..9b792a3f43 100644
--- a/src/reflect/scala/reflect/internal/util/WeakHashSet.scala
+++ b/src/reflect/scala/reflect/internal/util/WeakHashSet.scala
@@ -1,59 +1,430 @@
package scala
package reflect.internal.util
-import scala.collection.mutable
+import java.lang.ref.{WeakReference, ReferenceQueue}
+import scala.annotation.tailrec
import scala.collection.generic.Clearable
-import scala.runtime.AbstractFunction1
+import scala.collection.mutable.{Set => mSet}
-/** A bare-bones implementation of a mutable `Set` that uses weak references
- * to hold the elements.
+/**
+ * A HashSet where the elements are stored weakly. Elements in this set are elligible for GC if no other
+ * hard references are associated with them. Its primary use case is as a canonical reference
+ * identity holder (aka "hash-consing") via findEntryOrUpdate
*
- * This implementation offers only add/remove/test operations,
- * therefore it does not fulfill the contract of Scala collection sets.
+ * This Set implementation cannot hold null. Any attempt to put a null in it will result in a NullPointerException
+ *
+ * This set implmeentation is not in general thread safe without external concurrency control. However it behaves
+ * properly when GC concurrently collects elements in this set.
*/
-class WeakHashSet[T <: AnyRef] extends AbstractFunction1[T, Boolean] with Clearable {
- private val underlying = mutable.HashSet[WeakReferenceWithEquals[T]]()
+final class WeakHashSet[A <: AnyRef](val initialCapacity: Int, val loadFactor: Double) extends Set[A] with Function1[A, Boolean] with mSet[A] {
+
+ import WeakHashSet._
+
+ def this() = this(initialCapacity = WeakHashSet.defaultInitialCapacity, loadFactor = WeakHashSet.defaultLoadFactor)
+
+ type This = WeakHashSet[A]
+
+ /**
+ * queue of Entries that hold elements scheduled for GC
+ * the removeStaleEntries() method works through the queue to remeove
+ * stale entries from the table
+ */
+ private[this] val queue = new ReferenceQueue[A]
+
+ /**
+ * the number of elements in this set
+ */
+ private[this] var count = 0
+
+ /**
+ * from a specified initial capacity compute the capacity we'll use as being the next
+ * power of two equal to or greater than the specified initial capacity
+ */
+ private def computeCapacity = {
+ if (initialCapacity < 0) throw new IllegalArgumentException("initial capacity cannot be less than 0");
+ var candidate = 1
+ while (candidate < initialCapacity) {
+ candidate *= 2
+ }
+ candidate
+ }
+
+ /**
+ * the underlying table of entries which is an array of Entry linked lists
+ */
+ private[this] var table = new Array[Entry[A]](computeCapacity)
+
+ /**
+ * the limit at which we'll increase the size of the hash table
+ */
+ var threshhold = computeThreshHold
+
+ private[this] def computeThreshHold: Int = (table.size * loadFactor).ceil.toInt
- /** Add the given element to this set. */
- def +=(elem: T): this.type = {
- underlying += new WeakReferenceWithEquals(elem)
- this
+ /**
+ * find the bucket associated with an elements's hash code
+ */
+ private[this] def bucketFor(hash: Int): Int = {
+ // spread the bits around to try to avoid accidental collisions using the
+ // same algorithm as java.util.HashMap
+ var h = hash
+ h ^= h >>> 20 ^ h >>> 12
+ h ^= h >>> 7 ^ h >>> 4
+
+ // this is finding h % table.length, but takes advantage of the
+ // fact that table length is a power of 2,
+ // if you don't do bit flipping in your head, if table.length
+ // is binary 100000.. (with n 0s) then table.length - 1
+ // is 1111.. with n 1's.
+ // In other words this masks on the last n bits in the hash
+ h & (table.length - 1)
}
- /** Remove the given element from this set. */
- def -=(elem: T): this.type = {
- underlying -= new WeakReferenceWithEquals(elem)
- this
+ /**
+ * remove a single entry from a linked list in a given bucket
+ */
+ private[this] def remove(bucket: Int, prevEntry: Entry[A], entry: Entry[A]) {
+ prevEntry match {
+ case null => table(bucket) = entry.tail
+ case _ => prevEntry.tail = entry.tail
+ }
+ count -= 1
}
- /** Does the given element belong to this set? */
- def contains(elem: T): Boolean =
- underlying.contains(new WeakReferenceWithEquals(elem))
+ /**
+ * remove entries associated with elements that have been gc'ed
+ */
+ private[this] def removeStaleEntries() {
+ def poll(): Entry[A] = queue.poll().asInstanceOf[Entry[A]]
- /** Does the given element belong to this set? */
- def apply(elem: T): Boolean = contains(elem)
+ @tailrec
+ def queueLoop {
+ val stale = poll()
+ if (stale != null) {
+ val bucket = bucketFor(stale.hash)
- /** Return the number of elements in this set, including reclaimed elements. */
- def size = underlying.size
+ @tailrec
+ def linkedListLoop(prevEntry: Entry[A], entry: Entry[A]): Unit = if (stale eq entry) remove(bucket, prevEntry, entry)
+ else if (entry != null) linkedListLoop(entry, entry.tail)
- /** Remove all elements in this set. */
- def clear() = underlying.clear()
-}
+ linkedListLoop(null, table(bucket))
-/** A WeakReference implementation that implements equals and hashCode by
- * delegating to the referent.
- */
-class WeakReferenceWithEquals[T <: AnyRef](ref: T) {
- def get(): T = underlying.get()
+ queueLoop
+ }
+ }
+
+ queueLoop
+ }
+
+ /**
+ * Double the size of the internal table
+ */
+ private[this] def resize() {
+ val oldTable = table
+ table = new Array[Entry[A]](oldTable.size * 2)
+ threshhold = computeThreshHold
+
+ @tailrec
+ def tableLoop(oldBucket: Int): Unit = if (oldBucket < oldTable.size) {
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): Unit = entry match {
+ case null => ()
+ case _ => {
+ val bucket = bucketFor(entry.hash)
+ val oldNext = entry.tail
+ entry.tail = table(bucket)
+ table(bucket) = entry
+ linkedListLoop(oldNext)
+ }
+ }
+ linkedListLoop(oldTable(oldBucket))
+
+ tableLoop(oldBucket + 1)
+ }
+ tableLoop(0)
+ }
+
+ // from scala.reflect.internal.Set, find an element or null if it isn't contained
+ override def findEntry(elem: A): A = elem match {
+ case null => throw new NullPointerException("WeakHashSet cannot hold nulls")
+ case _ => {
+ removeStaleEntries()
+ val hash = elem.hashCode
+ val bucket = bucketFor(hash)
+
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): A = entry match {
+ case null => null.asInstanceOf[A]
+ case _ => {
+ val entryElem = entry.get
+ if (elem == entryElem) entryElem
+ else linkedListLoop(entry.tail)
+ }
+ }
+
+ linkedListLoop(table(bucket))
+ }
+ }
+ // add an element to this set unless it's already in there and return the element
+ def findEntryOrUpdate(elem: A): A = elem match {
+ case null => throw new NullPointerException("WeakHashSet cannot hold nulls")
+ case _ => {
+ removeStaleEntries()
+ val hash = elem.hashCode
+ val bucket = bucketFor(hash)
+ val oldHead = table(bucket)
+
+ def add() = {
+ table(bucket) = new Entry(elem, hash, oldHead, queue)
+ count += 1
+ if (count > threshhold) resize()
+ elem
+ }
+
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): A = entry match {
+ case null => add()
+ case _ => {
+ val entryElem = entry.get
+ if (elem == entryElem) entryElem
+ else linkedListLoop(entry.tail)
+ }
+ }
+
+ linkedListLoop(oldHead)
+ }
+ }
+
+ // add an element to this set unless it's already in there and return this set
+ override def +(elem: A): this.type = elem match {
+ case null => throw new NullPointerException("WeakHashSet cannot hold nulls")
+ case _ => {
+ removeStaleEntries()
+ val hash = elem.hashCode
+ val bucket = bucketFor(hash)
+ val oldHead = table(bucket)
+
+ def add() {
+ table(bucket) = new Entry(elem, hash, oldHead, queue)
+ count += 1
+ if (count > threshhold) resize()
+ }
+
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): Unit = entry match {
+ case null => add()
+ case _ if (elem == entry.get) => ()
+ case _ => linkedListLoop(entry.tail)
+ }
+
+ linkedListLoop(oldHead)
+ this
+ }
+ }
+
+ def +=(elem: A) = this + elem
+
+ // from scala.reflect.interanl.Set
+ override def addEntry(x: A) { this += x }
+
+ // remove an element from this set and return this set
+ override def -(elem: A): this.type = elem match {
+ case null => this
+ case _ => {
+ removeStaleEntries()
+ val bucket = bucketFor(elem.hashCode)
+
+
+
+ @tailrec
+ def linkedListLoop(prevEntry: Entry[A], entry: Entry[A]): Unit = entry match {
+ case null => ()
+ case _ if (elem == entry.get) => remove(bucket, prevEntry, entry)
+ case _ => linkedListLoop(entry, entry.tail)
+ }
- override val hashCode = ref.hashCode
+ linkedListLoop(null, table(bucket))
+ this
+ }
+ }
+
+ def -=(elem: A) = this - elem
+
+ // empty this set
+ override def clear(): Unit = {
+ table = new Array[Entry[A]](table.size)
+ threshhold = computeThreshHold
+ count = 0
+
+ // drain the queue - doesn't do anything because we're throwing away all the values anyway
+ @tailrec def queueLoop(): Unit = if (queue.poll() != null) queueLoop()
+ queueLoop()
+ }
+
+ // true if this set is empty
+ override def empty: This = new WeakHashSet[A](initialCapacity, loadFactor)
+
+ // the number of elements in this set
+ override def size: Int = {
+ removeStaleEntries()
+ count
+ }
+
+ override def apply(x: A): Boolean = this contains x
+
+ override def foreach[U](f: A => U): Unit = iterator foreach f
+
+ override def toList(): List[A] = iterator.toList
+
+ // Iterator over all the elements in this set in no particular order
+ override def iterator: Iterator[A] = {
+ removeStaleEntries()
+
+ new Iterator[A] {
+
+ /**
+ * the bucket currently being examined. Initially it's set past the last bucket and will be decremented
+ */
+ private[this] var currentBucket: Int = table.size
+
+ /**
+ * the entry that was last examined
+ */
+ private[this] var entry: Entry[A] = null
+
+ /**
+ * the element that will be the result of the next call to next()
+ */
+ private[this] var lookaheadelement: A = null.asInstanceOf[A]
+
+ @tailrec
+ def hasNext: Boolean = {
+ while (entry == null && currentBucket > 0) {
+ currentBucket -= 1
+ entry = table(currentBucket)
+ }
+
+ if (entry == null) false
+ else {
+ lookaheadelement = entry.get
+ if (lookaheadelement == null) {
+ // element null means the weakref has been cleared since we last did a removeStaleEntries(), move to the next entry
+ entry = entry.tail
+ hasNext
+ } else {
+ true
+ }
+ }
+ }
- override def equals(other: Any): Boolean = other match {
- case wf: WeakReferenceWithEquals[_] =>
- underlying.get() == wf.get()
- case _ =>
- false
+ def next(): A = if (lookaheadelement == null)
+ throw new IndexOutOfBoundsException("next on an empty iterator")
+ else {
+ val result = lookaheadelement
+ lookaheadelement = null.asInstanceOf[A]
+ entry = entry.tail
+ result
+ }
+ }
}
- private val underlying = new java.lang.ref.WeakReference(ref)
+ /**
+ * Diagnostic information about the internals of this set. Not normally
+ * needed by ordinary code, but may be useful for diagnosing performance problems
+ */
+ private[util] class Diagnostics {
+ /**
+ * Verify that the internal structure of this hash set is fully consistent.
+ * Throws an assertion error on any problem. In order for it to be reliable
+ * the entries must be stable. If any are garbage collected during validation
+ * then an assertion may inappropriately fire.
+ */
+ def fullyValidate {
+ var computedCount = 0
+ var bucket = 0
+ while (bucket < table.size) {
+ var entry = table(bucket)
+ while (entry != null) {
+ assert(entry.get != null, s"$entry had a null value indicated that gc activity was happening during diagnostic validation or that a null value was inserted")
+ computedCount += 1
+ val cachedHash = entry.hash
+ val realHash = entry.get.hashCode
+ assert(cachedHash == realHash, s"for $entry cached hash was $cachedHash but should have been $realHash")
+ val computedBucket = bucketFor(realHash)
+ assert(computedBucket == bucket, s"for $entry the computed bucket was $computedBucket but should have been $bucket")
+
+ entry = entry.tail
+ }
+
+ bucket += 1
+ }
+
+ assert(computedCount == count, s"The computed count was $computedCount but should have been $count")
+ }
+
+ /**
+ * Produces a diagnostic dump of the table that underlies this hash set.
+ */
+ def dump = table.deep
+
+ /**
+ * Number of buckets that hold collisions. Useful for diagnosing performance issues.
+ */
+ def collisionBucketsCount: Int =
+ (table filter (entry => entry != null && entry.tail != null)).size
+
+ /**
+ * Number of buckets that are occupied in this hash table.
+ */
+ def fullBucketsCount: Int =
+ (table filter (entry => entry != null)).size
+
+ /**
+ * Number of buckets in the table
+ */
+ def bucketsCount: Int = table.size
+
+ /**
+ * Number of buckets that don't hold anything
+ */
+ def emptyBucketsCount = bucketsCount - fullBucketsCount
+
+ /**
+ * Number of elements that are in collision. Useful for diagnosing performance issues.
+ */
+ def collisionsCount = size - (fullBucketsCount - collisionBucketsCount)
+
+ /**
+ * A map from a count of elements to the number of buckets with that count
+ */
+ def elementCountDistribution = table map linkedListSize groupBy identity map {case (size, list) => (size, list.size)}
+
+ private def linkedListSize(entry: Entry[A]) = {
+ var e = entry
+ var count = 0
+ while (e != null) {
+ count += 1
+ e = e.tail
+ }
+ count
+ }
+ }
+
+ private[util] def diagnostics = new Diagnostics
}
+
+/**
+ * Companion object for WeakHashSet
+ */
+object WeakHashSet {
+ /**
+ * A single entry in a WeakHashSet. It's a WeakReference plus a cached hash code and
+ * a link to the next Entry in the same bucket
+ */
+ private class Entry[A](element: A, val hash:Int, var tail: Entry[A], queue: ReferenceQueue[A]) extends WeakReference[A](element, queue)
+
+ val defaultInitialCapacity = 16
+ val defaultLoadFactor = .75
+
+ def apply[A <: AnyRef](initialCapacity: Int = WeakHashSet.defaultInitialCapacity, loadFactor: Double = WeakHashSet.defaultLoadFactor) = new WeakHashSet[A](initialCapacity, defaultLoadFactor)
+} \ No newline at end of file