diff options
author | Pavel Pavlov <pavel.e.pavlov@gmail.com> | 2012-08-27 00:12:07 +0700 |
---|---|---|
committer | Pavel Pavlov <pavel.e.pavlov@gmail.com> | 2012-08-27 19:35:53 +0700 |
commit | 3cd8eb053afcb3592770437112655023ede17505 (patch) | |
tree | e729437cdee551244dd339872f50937c67aaa05c /src/library/scala/collection/mutable/HashTable.scala | |
parent | a23edefac652e3be1474fceb3ee15d7eaecf1359 (diff) | |
download | scala-3cd8eb053afcb3592770437112655023ede17505.tar.gz scala-3cd8eb053afcb3592770437112655023ede17505.tar.bz2 scala-3cd8eb053afcb3592770437112655023ede17505.zip |
SI-5767 fix + small HashSet/HashMap fixes
- `LinkedHashSet` implementation moved from `FlatHashTable` to `HashTable`
- `remove` time reduced from O(n) to O(1)
- `diff` time reduced from O(n^2) to O(n)
- A bit of refactoring in `HashTable` serialization code
- Putting an element into hash map now avoids double hash code/hash index
calculation (see `HashTable#findOrAddEntry`)
- bugfix: compiler/LambdaLift occasionally breaks LinkedHashSet integrity
Diffstat (limited to 'src/library/scala/collection/mutable/HashTable.scala')
-rw-r--r-- | src/library/scala/collection/mutable/HashTable.scala | 56 |
1 files changed, 39 insertions, 17 deletions
diff --git a/src/library/scala/collection/mutable/HashTable.scala b/src/library/scala/collection/mutable/HashTable.scala index 97e794f06e..ff66df624d 100644 --- a/src/library/scala/collection/mutable/HashTable.scala +++ b/src/library/scala/collection/mutable/HashTable.scala @@ -32,6 +32,9 @@ package mutable * @tparam A type of the elements contained in this hash table. */ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashUtils[A] { + // Replacing Entry type parameter by abstract type member here allows to not expose to public + // implementation-specific entry classes such as `DefaultEntry` or `LinkedEntry`. + // However, I'm afraid it's too late now for such breaking change. import HashTable._ @transient protected var _loadFactor = defaultLoadFactor @@ -75,11 +78,10 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU } /** - * Initializes the collection from the input stream. `f` will be called for each key/value pair - * read from the input stream in the order determined by the stream. This is useful for - * structures where iteration order is important (e.g. LinkedHashMap). + * Initializes the collection from the input stream. `readEntry` will be called for each + * entry to be read from the input stream. */ - private[collection] def init[B](in: java.io.ObjectInputStream, f: (A, B) => Entry) { + private[collection] def init(in: java.io.ObjectInputStream, readEntry: => Entry) { in.defaultReadObject _loadFactor = in.readInt() @@ -100,35 +102,34 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU var index = 0 while (index < size) { - addEntry(f(in.readObject().asInstanceOf[A], in.readObject().asInstanceOf[B])) + addEntry(readEntry) index += 1 } } /** * Serializes the collection to the output stream by saving the load factor, collection - * size, collection keys and collection values. `value` is responsible for providing a value - * from an entry. + * size and collection entries. `writeEntry` is responsible for writing an entry to the stream. * - * `foreach` determines the order in which the key/value pairs are saved to the stream. To + * `foreachEntry` determines the order in which the key/value pairs are saved to the stream. To * deserialize, `init` should be used. */ - private[collection] def serializeTo[B](out: java.io.ObjectOutputStream, value: Entry => B) { + private[collection] def serializeTo(out: java.io.ObjectOutputStream, writeEntry: Entry => Unit) { out.defaultWriteObject out.writeInt(_loadFactor) out.writeInt(tableSize) out.writeInt(seedvalue) out.writeBoolean(isSizeMapDefined) - foreachEntry { entry => - out.writeObject(entry.key) - out.writeObject(value(entry)) - } + + foreachEntry(writeEntry) } /** Find entry with given key in table, null if not found. */ - protected def findEntry(key: A): Entry = { - val h = index(elemHashCode(key)) + protected def findEntry(key: A): Entry = + findEntry0(key, index(elemHashCode(key))) + + private[this] def findEntry0(key: A, h: Int): Entry = { var e = table(h).asInstanceOf[Entry] while (e != null && !elemEquals(e.key, key)) e = e.next e @@ -138,7 +139,10 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU * pre: no entry with same key exists */ protected def addEntry(e: Entry) { - val h = index(elemHashCode(e.key)) + addEntry0(e, index(elemHashCode(e.key))) + } + + private[this] def addEntry0(e: Entry, h: Int) { e.next = table(h).asInstanceOf[Entry] table(h) = e tableSize = tableSize + 1 @@ -147,6 +151,24 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU resize(2 * table.length) } + /** Find entry with given key in table, or add new one if not found. + * May be somewhat faster then `findEntry`/`addEntry` pair as it + * computes entry's hash index only once. + * Returns entry found in table or null. + * Method `createNewEntry` must be implemented in any subclass/subtrait who calls this. + */ + protected def findOrAddEntry[B](key: A, value: B): Entry = { + val h = index(elemHashCode(key)) + val e = findEntry0(key, h) + if (e ne null) e else { addEntry0(createNewEntry(key, value), h); null } + } + + /** Creates new entry to be immediately inserted into the hashtable. + * This method must be implemented for any class who calls `findOrAddEntry`. + * Ideally, it should be abstract, but this will break source compatibility. + */ + protected def createNewEntry[B](key: A, value: B): Entry = ??? + /** Remove entry from table if present. */ protected def removeEntry(key: A) : Entry = { @@ -195,7 +217,7 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU } /** Avoid iterator for a 2x faster traversal. */ - protected def foreachEntry[C](f: Entry => C) { + protected def foreachEntry[U](f: Entry => U) { val iterTable = table var idx = lastPopulatedIndex var es = iterTable(idx) |