From ad63f3630705bd192d9c983399c572b655b3612b Mon Sep 17 00:00:00 2001 From: James Iry Date: Fri, 17 May 2013 10:28:55 -0700 Subject: SI-7150 Replace scala.reflect.internal.WeakHashSet Replaces scala.reflect.internal.WeakHashSet with a version that * extends the mutable.Set trait * doesn't leak WeakReferences * is unit tested --- .../scala/reflect/internal/util/WeakHashSet.scala | 447 +++++++++++++++++++-- 1 file changed, 409 insertions(+), 38 deletions(-) (limited to 'src/reflect') diff --git a/src/reflect/scala/reflect/internal/util/WeakHashSet.scala b/src/reflect/scala/reflect/internal/util/WeakHashSet.scala index ef62fa481b..9b792a3f43 100644 --- a/src/reflect/scala/reflect/internal/util/WeakHashSet.scala +++ b/src/reflect/scala/reflect/internal/util/WeakHashSet.scala @@ -1,59 +1,430 @@ package scala package reflect.internal.util -import scala.collection.mutable +import java.lang.ref.{WeakReference, ReferenceQueue} +import scala.annotation.tailrec import scala.collection.generic.Clearable -import scala.runtime.AbstractFunction1 +import scala.collection.mutable.{Set => mSet} -/** A bare-bones implementation of a mutable `Set` that uses weak references - * to hold the elements. +/** + * A HashSet where the elements are stored weakly. Elements in this set are elligible for GC if no other + * hard references are associated with them. Its primary use case is as a canonical reference + * identity holder (aka "hash-consing") via findEntryOrUpdate * - * This implementation offers only add/remove/test operations, - * therefore it does not fulfill the contract of Scala collection sets. + * This Set implementation cannot hold null. Any attempt to put a null in it will result in a NullPointerException + * + * This set implmeentation is not in general thread safe without external concurrency control. However it behaves + * properly when GC concurrently collects elements in this set. */ -class WeakHashSet[T <: AnyRef] extends AbstractFunction1[T, Boolean] with Clearable { - private val underlying = mutable.HashSet[WeakReferenceWithEquals[T]]() +final class WeakHashSet[A <: AnyRef](val initialCapacity: Int, val loadFactor: Double) extends Set[A] with Function1[A, Boolean] with mSet[A] { + + import WeakHashSet._ + + def this() = this(initialCapacity = WeakHashSet.defaultInitialCapacity, loadFactor = WeakHashSet.defaultLoadFactor) + + type This = WeakHashSet[A] + + /** + * queue of Entries that hold elements scheduled for GC + * the removeStaleEntries() method works through the queue to remeove + * stale entries from the table + */ + private[this] val queue = new ReferenceQueue[A] + + /** + * the number of elements in this set + */ + private[this] var count = 0 + + /** + * from a specified initial capacity compute the capacity we'll use as being the next + * power of two equal to or greater than the specified initial capacity + */ + private def computeCapacity = { + if (initialCapacity < 0) throw new IllegalArgumentException("initial capacity cannot be less than 0"); + var candidate = 1 + while (candidate < initialCapacity) { + candidate *= 2 + } + candidate + } + + /** + * the underlying table of entries which is an array of Entry linked lists + */ + private[this] var table = new Array[Entry[A]](computeCapacity) + + /** + * the limit at which we'll increase the size of the hash table + */ + var threshhold = computeThreshHold + + private[this] def computeThreshHold: Int = (table.size * loadFactor).ceil.toInt - /** Add the given element to this set. */ - def +=(elem: T): this.type = { - underlying += new WeakReferenceWithEquals(elem) - this + /** + * find the bucket associated with an elements's hash code + */ + private[this] def bucketFor(hash: Int): Int = { + // spread the bits around to try to avoid accidental collisions using the + // same algorithm as java.util.HashMap + var h = hash + h ^= h >>> 20 ^ h >>> 12 + h ^= h >>> 7 ^ h >>> 4 + + // this is finding h % table.length, but takes advantage of the + // fact that table length is a power of 2, + // if you don't do bit flipping in your head, if table.length + // is binary 100000.. (with n 0s) then table.length - 1 + // is 1111.. with n 1's. + // In other words this masks on the last n bits in the hash + h & (table.length - 1) } - /** Remove the given element from this set. */ - def -=(elem: T): this.type = { - underlying -= new WeakReferenceWithEquals(elem) - this + /** + * remove a single entry from a linked list in a given bucket + */ + private[this] def remove(bucket: Int, prevEntry: Entry[A], entry: Entry[A]) { + prevEntry match { + case null => table(bucket) = entry.tail + case _ => prevEntry.tail = entry.tail + } + count -= 1 } - /** Does the given element belong to this set? */ - def contains(elem: T): Boolean = - underlying.contains(new WeakReferenceWithEquals(elem)) + /** + * remove entries associated with elements that have been gc'ed + */ + private[this] def removeStaleEntries() { + def poll(): Entry[A] = queue.poll().asInstanceOf[Entry[A]] - /** Does the given element belong to this set? */ - def apply(elem: T): Boolean = contains(elem) + @tailrec + def queueLoop { + val stale = poll() + if (stale != null) { + val bucket = bucketFor(stale.hash) - /** Return the number of elements in this set, including reclaimed elements. */ - def size = underlying.size + @tailrec + def linkedListLoop(prevEntry: Entry[A], entry: Entry[A]): Unit = if (stale eq entry) remove(bucket, prevEntry, entry) + else if (entry != null) linkedListLoop(entry, entry.tail) - /** Remove all elements in this set. */ - def clear() = underlying.clear() -} + linkedListLoop(null, table(bucket)) -/** A WeakReference implementation that implements equals and hashCode by - * delegating to the referent. - */ -class WeakReferenceWithEquals[T <: AnyRef](ref: T) { - def get(): T = underlying.get() + queueLoop + } + } + + queueLoop + } + + /** + * Double the size of the internal table + */ + private[this] def resize() { + val oldTable = table + table = new Array[Entry[A]](oldTable.size * 2) + threshhold = computeThreshHold + + @tailrec + def tableLoop(oldBucket: Int): Unit = if (oldBucket < oldTable.size) { + @tailrec + def linkedListLoop(entry: Entry[A]): Unit = entry match { + case null => () + case _ => { + val bucket = bucketFor(entry.hash) + val oldNext = entry.tail + entry.tail = table(bucket) + table(bucket) = entry + linkedListLoop(oldNext) + } + } + linkedListLoop(oldTable(oldBucket)) + + tableLoop(oldBucket + 1) + } + tableLoop(0) + } + + // from scala.reflect.internal.Set, find an element or null if it isn't contained + override def findEntry(elem: A): A = elem match { + case null => throw new NullPointerException("WeakHashSet cannot hold nulls") + case _ => { + removeStaleEntries() + val hash = elem.hashCode + val bucket = bucketFor(hash) + + @tailrec + def linkedListLoop(entry: Entry[A]): A = entry match { + case null => null.asInstanceOf[A] + case _ => { + val entryElem = entry.get + if (elem == entryElem) entryElem + else linkedListLoop(entry.tail) + } + } + + linkedListLoop(table(bucket)) + } + } + // add an element to this set unless it's already in there and return the element + def findEntryOrUpdate(elem: A): A = elem match { + case null => throw new NullPointerException("WeakHashSet cannot hold nulls") + case _ => { + removeStaleEntries() + val hash = elem.hashCode + val bucket = bucketFor(hash) + val oldHead = table(bucket) + + def add() = { + table(bucket) = new Entry(elem, hash, oldHead, queue) + count += 1 + if (count > threshhold) resize() + elem + } + + @tailrec + def linkedListLoop(entry: Entry[A]): A = entry match { + case null => add() + case _ => { + val entryElem = entry.get + if (elem == entryElem) entryElem + else linkedListLoop(entry.tail) + } + } + + linkedListLoop(oldHead) + } + } + + // add an element to this set unless it's already in there and return this set + override def +(elem: A): this.type = elem match { + case null => throw new NullPointerException("WeakHashSet cannot hold nulls") + case _ => { + removeStaleEntries() + val hash = elem.hashCode + val bucket = bucketFor(hash) + val oldHead = table(bucket) + + def add() { + table(bucket) = new Entry(elem, hash, oldHead, queue) + count += 1 + if (count > threshhold) resize() + } + + @tailrec + def linkedListLoop(entry: Entry[A]): Unit = entry match { + case null => add() + case _ if (elem == entry.get) => () + case _ => linkedListLoop(entry.tail) + } + + linkedListLoop(oldHead) + this + } + } + + def +=(elem: A) = this + elem + + // from scala.reflect.interanl.Set + override def addEntry(x: A) { this += x } + + // remove an element from this set and return this set + override def -(elem: A): this.type = elem match { + case null => this + case _ => { + removeStaleEntries() + val bucket = bucketFor(elem.hashCode) + + + + @tailrec + def linkedListLoop(prevEntry: Entry[A], entry: Entry[A]): Unit = entry match { + case null => () + case _ if (elem == entry.get) => remove(bucket, prevEntry, entry) + case _ => linkedListLoop(entry, entry.tail) + } - override val hashCode = ref.hashCode + linkedListLoop(null, table(bucket)) + this + } + } + + def -=(elem: A) = this - elem + + // empty this set + override def clear(): Unit = { + table = new Array[Entry[A]](table.size) + threshhold = computeThreshHold + count = 0 + + // drain the queue - doesn't do anything because we're throwing away all the values anyway + @tailrec def queueLoop(): Unit = if (queue.poll() != null) queueLoop() + queueLoop() + } + + // true if this set is empty + override def empty: This = new WeakHashSet[A](initialCapacity, loadFactor) + + // the number of elements in this set + override def size: Int = { + removeStaleEntries() + count + } + + override def apply(x: A): Boolean = this contains x + + override def foreach[U](f: A => U): Unit = iterator foreach f + + override def toList(): List[A] = iterator.toList + + // Iterator over all the elements in this set in no particular order + override def iterator: Iterator[A] = { + removeStaleEntries() + + new Iterator[A] { + + /** + * the bucket currently being examined. Initially it's set past the last bucket and will be decremented + */ + private[this] var currentBucket: Int = table.size + + /** + * the entry that was last examined + */ + private[this] var entry: Entry[A] = null + + /** + * the element that will be the result of the next call to next() + */ + private[this] var lookaheadelement: A = null.asInstanceOf[A] + + @tailrec + def hasNext: Boolean = { + while (entry == null && currentBucket > 0) { + currentBucket -= 1 + entry = table(currentBucket) + } + + if (entry == null) false + else { + lookaheadelement = entry.get + if (lookaheadelement == null) { + // element null means the weakref has been cleared since we last did a removeStaleEntries(), move to the next entry + entry = entry.tail + hasNext + } else { + true + } + } + } - override def equals(other: Any): Boolean = other match { - case wf: WeakReferenceWithEquals[_] => - underlying.get() == wf.get() - case _ => - false + def next(): A = if (lookaheadelement == null) + throw new IndexOutOfBoundsException("next on an empty iterator") + else { + val result = lookaheadelement + lookaheadelement = null.asInstanceOf[A] + entry = entry.tail + result + } + } } - private val underlying = new java.lang.ref.WeakReference(ref) + /** + * Diagnostic information about the internals of this set. Not normally + * needed by ordinary code, but may be useful for diagnosing performance problems + */ + private[util] class Diagnostics { + /** + * Verify that the internal structure of this hash set is fully consistent. + * Throws an assertion error on any problem. In order for it to be reliable + * the entries must be stable. If any are garbage collected during validation + * then an assertion may inappropriately fire. + */ + def fullyValidate { + var computedCount = 0 + var bucket = 0 + while (bucket < table.size) { + var entry = table(bucket) + while (entry != null) { + assert(entry.get != null, s"$entry had a null value indicated that gc activity was happening during diagnostic validation or that a null value was inserted") + computedCount += 1 + val cachedHash = entry.hash + val realHash = entry.get.hashCode + assert(cachedHash == realHash, s"for $entry cached hash was $cachedHash but should have been $realHash") + val computedBucket = bucketFor(realHash) + assert(computedBucket == bucket, s"for $entry the computed bucket was $computedBucket but should have been $bucket") + + entry = entry.tail + } + + bucket += 1 + } + + assert(computedCount == count, s"The computed count was $computedCount but should have been $count") + } + + /** + * Produces a diagnostic dump of the table that underlies this hash set. + */ + def dump = table.deep + + /** + * Number of buckets that hold collisions. Useful for diagnosing performance issues. + */ + def collisionBucketsCount: Int = + (table filter (entry => entry != null && entry.tail != null)).size + + /** + * Number of buckets that are occupied in this hash table. + */ + def fullBucketsCount: Int = + (table filter (entry => entry != null)).size + + /** + * Number of buckets in the table + */ + def bucketsCount: Int = table.size + + /** + * Number of buckets that don't hold anything + */ + def emptyBucketsCount = bucketsCount - fullBucketsCount + + /** + * Number of elements that are in collision. Useful for diagnosing performance issues. + */ + def collisionsCount = size - (fullBucketsCount - collisionBucketsCount) + + /** + * A map from a count of elements to the number of buckets with that count + */ + def elementCountDistribution = table map linkedListSize groupBy identity map {case (size, list) => (size, list.size)} + + private def linkedListSize(entry: Entry[A]) = { + var e = entry + var count = 0 + while (e != null) { + count += 1 + e = e.tail + } + count + } + } + + private[util] def diagnostics = new Diagnostics } + +/** + * Companion object for WeakHashSet + */ +object WeakHashSet { + /** + * A single entry in a WeakHashSet. It's a WeakReference plus a cached hash code and + * a link to the next Entry in the same bucket + */ + private class Entry[A](element: A, val hash:Int, var tail: Entry[A], queue: ReferenceQueue[A]) extends WeakReference[A](element, queue) + + val defaultInitialCapacity = 16 + val defaultLoadFactor = .75 + + def apply[A <: AnyRef](initialCapacity: Int = WeakHashSet.defaultInitialCapacity, loadFactor: Double = WeakHashSet.defaultLoadFactor) = new WeakHashSet[A](initialCapacity, defaultLoadFactor) +} \ No newline at end of file -- cgit v1.2.3 From 746d85b2e6f8c8dd64016e0b4c0eafe1b808b147 Mon Sep 17 00:00:00 2001 From: James Iry Date: Fri, 17 May 2013 13:43:13 -0700 Subject: SI-7149 Use a WeakHashSet for type uniqueness Currently type uniqueness is done via a HashSet[Type], but that means the Types live through an entire compile session, even ones that are used once. The result is a huge amount of unnecessarily retained memory. This commit uses a WeakHashSet instead so that Types and their WeakReferences are cleaned up when no longer in use. --- src/reflect/scala/reflect/internal/Types.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/reflect') diff --git a/src/reflect/scala/reflect/internal/Types.scala b/src/reflect/scala/reflect/internal/Types.scala index 00a929003e..a435292185 100644 --- a/src/reflect/scala/reflect/internal/Types.scala +++ b/src/reflect/scala/reflect/internal/Types.scala @@ -3668,13 +3668,13 @@ trait Types // Hash consing -------------------------------------------------------------- private val initialUniquesCapacity = 4096 - private var uniques: util.HashSet[Type] = _ + private var uniques: util.WeakHashSet[Type] = _ private var uniqueRunId = NoRunId protected def unique[T <: Type](tp: T): T = { if (Statistics.canEnable) Statistics.incCounter(rawTypeCount) if (uniqueRunId != currentRunId) { - uniques = util.HashSet[Type]("uniques", initialUniquesCapacity) + uniques = util.WeakHashSet[Type](initialUniquesCapacity) perRunCaches.recordCache(uniques) uniqueRunId = currentRunId } -- cgit v1.2.3 From b7e0fd26db0a0aa2c77b5196a18d99ac91fe072c Mon Sep 17 00:00:00 2001 From: James Iry Date: Fri, 17 May 2013 14:00:57 -0700 Subject: Modify perRunCaches to not leak WeakReferences perRunCaches was using a HashMap of WeakReferences which meant it would accumulate WeakReferences over time. This commit uses a WeakHashSet instead so that the references are cleaned up. --- src/reflect/scala/reflect/internal/SymbolTable.scala | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'src/reflect') diff --git a/src/reflect/scala/reflect/internal/SymbolTable.scala b/src/reflect/scala/reflect/internal/SymbolTable.scala index 7f9811220f..2ae9f81a09 100644 --- a/src/reflect/scala/reflect/internal/SymbolTable.scala +++ b/src/reflect/scala/reflect/internal/SymbolTable.scala @@ -307,27 +307,20 @@ abstract class SymbolTable extends macros.Universe } object perRunCaches { - import java.lang.ref.WeakReference import scala.collection.generic.Clearable // Weak references so the garbage collector will take care of // letting us know when a cache is really out of commission. - private val caches = mutable.HashSet[WeakReference[Clearable]]() + private val caches = WeakHashSet[Clearable]() def recordCache[T <: Clearable](cache: T): T = { - caches += new WeakReference(cache) + caches += cache cache } def clearAll() = { debuglog("Clearing " + caches.size + " caches.") - caches foreach { ref => - val cache = ref.get() - if (cache == null) - caches -= ref - else - cache.clear() - } + caches foreach (_.clear) } def newWeakMap[K, V]() = recordCache(mutable.WeakHashMap[K, V]()) -- cgit v1.2.3