From 05aedd936e7e7bcf0fa2443abd58b39732f173a9 Mon Sep 17 00:00:00 2001 From: Rex Kerr Date: Sun, 10 Nov 2013 15:05:58 -0800 Subject: New mutable hash map with Long keys: partially solves SI-5263 and is relevant to SI-6825. The hash map is based on an open addressing scheme that provides dramatically faster (mostly due to specialization) get and contains operations than either the standard Java HashMap or any of the existing Scala hash maps. It doesn't work well above 500,000,000 elements as the arrays cannot scale past 2^30 elements. Maps are not faster in general due to the lack of specialization of Function1[Long, V]. Revisions made sure that all return types in the public API are specified. Also switched to a leading-zeros method of calculating the initial mask (to save a few ns), and used an occasionally-more-efficient version of seekEntryOrOpen. Also edited out specific performance numbers and moved constants to companion. --- src/library/scala/collection/mutable/LongMap.scala | 558 +++++++++++++++++++++ test/files/run/mutable-longmap.scala | 79 +++ 2 files changed, 637 insertions(+) create mode 100644 src/library/scala/collection/mutable/LongMap.scala create mode 100644 test/files/run/mutable-longmap.scala diff --git a/src/library/scala/collection/mutable/LongMap.scala b/src/library/scala/collection/mutable/LongMap.scala new file mode 100644 index 0000000000..81c381279f --- /dev/null +++ b/src/library/scala/collection/mutable/LongMap.scala @@ -0,0 +1,558 @@ +package scala +package collection +package mutable + +import generic.CanBuildFrom + +/** This class implements mutable maps with `Long` keys based on a hash table with open addressing. + * + * Basic map operations on single entries, including `contains` and `get`, + * are typically substantially faster with `LongMap` than [[HashMap]]. Methods + * that act on the whole map, including `foreach` and `map` are not in + * general expected to be faster than with a generic map, save for those + * that take particular advantage of the internal structure of the map: + * `foreachKey`, `foreachValue`, `mapValuesNow`, and `transformValues`. + * + * Maps with open addressing may become less efficient at lookup after + * repeated addition/removal of elements. Although `LongMap` makes a + * decent attempt to remain efficient regardless, calling `repack` + * on a map that will no longer have elements removed but will be + * used heavily may save both time and storage space. + * + * This map is not indended to contain more than 2^29 entries (approximately + * 500 million). The maximum capacity is 2^30, but performance will degrade + * rapidly as 2^30 is approached. + * + */ +final class LongMap[V] private[collection] (defaultEntry: Long => V, initialBufferSize: Int, initBlank: Boolean) +extends AbstractMap[Long, V] + with Map[Long, V] + with MapLike[Long, V, LongMap[V]] + with Serializable +{ + import LongMap._ + + def this() = this(LongMap.exceptionDefault, 16, true) + + /** Creates a new `LongMap` that returns default values according to a supplied key-value mapping. */ + def this(defaultEntry: Long => V) = this(defaultEntry, 16, true) + + /** Creates a new `LongMap` with an initial buffer of specified size. + * + * A LongMap can typically contain half as many elements as its buffer size + * before it requires resizing. + */ + def this(initialBufferSize: Int) = this(LongMap.exceptionDefault, initialBufferSize, true) + + /** Creates a new `LongMap` with specified default values and initial buffer size. */ + def this(defaultEntry: Long => V, initialBufferSize: Int) = this(defaultEntry, initialBufferSize, true) + + private[this] var mask = 0 + private[this] var extraKeys: Int = 0 + private[this] var zeroValue: AnyRef = null + private[this] var minValue: AnyRef = null + private[this] var _size = 0 + private[this] var _vacant = 0 + private[this] var _keys: Array[Long] = null + private[this] var _values: Array[AnyRef] = null + + if (initBlank) defaultInitialize(initialBufferSize) + + private[this] def defaultInitialize(n: Int) = { + mask = + if (n<0) 0x7 + else (((1 << (32 - java.lang.Integer.numberOfLeadingZeros(n-1))) - 1) & 0x3FFFFFFF) | 0x7 + _keys = new Array[Long](mask+1) + _values = new Array[AnyRef](mask+1) + } + + private[collection] def initializeTo( + m: Int, ek: Int, zv: AnyRef, mv: AnyRef, sz: Int, vc: Int, kz: Array[Long], vz: Array[AnyRef] + ) { + mask = m; extraKeys = ek; zeroValue = zv; minValue = mv; _size = sz; _vacant = vc; _keys = kz; _values = vz + } + + override def size: Int = _size + (extraKeys+1)/2 + override def empty: LongMap[V] = new LongMap() + + private def imbalanced: Boolean = + (_size + _vacant) > 0.5*mask || _vacant > _size + + private def toIndex(k: Long): Int = { + // Part of the MurmurHash3 32 bit finalizer + val h = ((k ^ (k >>> 32)) & 0xFFFFFFFFL).toInt + var x = (h ^ (h >>> 16)) * 0x85EBCA6B + (x ^ (x >>> 13)) & mask + } + + private def seekEmpty(k: Long): Int = { + var e = toIndex(k) + var x = 0 + while (_keys(e) != 0) { x += 1; e = (e + 2*(x+1)*x - 3) & mask } + e + } + + private def seekEntry(k: Long): Int = { + var e = toIndex(k) + var x = 0 + var q = 0L + while ({ q = _keys(e); if (q==k) return e; q != 0}) { x += 1; e = (e + 2*(x+1)*x - 3) & mask } + e | MissingBit + } + + private def seekEntryOrOpen(k: Long): Int = { + var e = toIndex(k) + var x = 0 + var q = 0L + while ({ q = _keys(e); if (q==k) return e; q+q != 0}) { + x += 1 + e = (e + 2*(x+1)*x - 3) & mask + } + if (q == 0) return e | MissingBit + val o = e | MissVacant + while ({ q = _keys(e); if (q==k) return e; q != 0}) { + x += 1 + e = (e + 2*(x+1)*x - 3) & mask + } + o + } + + override def contains(key: Long): Boolean = { + if (key == -key) (((key>>>63).toInt+1) & extraKeys) != 0 + else seekEntry(key) >= 0 + } + + override def get(key: Long): Option[V] = { + if (key == -key) { + if ((((key>>>63).toInt+1) & extraKeys) == 0) None + else if (key == 0) Some(zeroValue.asInstanceOf[V]) + else Some(minValue.asInstanceOf[V]) + } + else { + val i = seekEntry(key) + if (i < 0) None else Some(_values(i).asInstanceOf[V]) + } + } + + override def getOrElse[V1 >: V](key: Long, default: => V1): V1 = { + if (key == -key) { + if ((((key>>>63).toInt+1) & extraKeys) == 0) default + else if (key == 0) zeroValue.asInstanceOf[V1] + else minValue.asInstanceOf[V1] + } + else { + val i = seekEntry(key) + if (i < 0) default else _values(i).asInstanceOf[V1] + } + } + + override def getOrElseUpdate(key: Long, defaultValue: => V): V = { + if (key == -key) { + val kbits = (key>>>63).toInt + 1 + if ((kbits & extraKeys) == 0) { + val value = defaultValue + extraKeys |= kbits + if (key == 0) zeroValue = value.asInstanceOf[AnyRef] + else minValue = value.asInstanceOf[AnyRef] + value + } + else if (key == 0) zeroValue.asInstanceOf[V] + else minValue.asInstanceOf[V] + } + else { + val i = seekEntryOrOpen(key) + if (i < 0) { + val value = defaultValue + _size += 1 + val j = i & IndexMask + _keys(j) = key + _values(j) = value.asInstanceOf[AnyRef] + if ((i & VacantBit) != 0) _vacant -= 1 + else if (imbalanced) repack() + value + } + else _values(i).asInstanceOf[V] + } + } + + /** Retrieves the value associated with a key, or the default for that type if none exists + * (null for AnyRef, 0 for floats and integers). + * + * Note: this is the fastest way to retrieve a value that may or + * may not exist, if the default null/zero is acceptable. For key/value + * pairs that do exist, `apply` (i.e. `map(key)`) is equally fast. + */ + def getOrNull(key: Long): V = { + if (key == -key) { + if ((((key>>>63).toInt+1) & extraKeys) == 0) null.asInstanceOf[V] + else if (key == 0) zeroValue.asInstanceOf[V] + else minValue.asInstanceOf[V] + } + else { + val i = seekEntry(key) + if (i < 0) null.asInstanceOf[V] else _values(i).asInstanceOf[V] + } + } + + /** Retrieves the value associated with a key. + * If the key does not exist in the map, the `defaultEntry` for that key + * will be returned instead. + */ + override def apply(key: Long): V = { + if (key == -key) { + if ((((key>>>63).toInt+1) & extraKeys) == 0) defaultEntry(key) + else if (key == 0) zeroValue.asInstanceOf[V] + else minValue.asInstanceOf[V] + } + else { + val i = seekEntry(key) + if (i < 0) defaultEntry(key) else _values(i).asInstanceOf[V] + } + } + + /** The user-supplied default value for the key. Throws an exception + * if no other default behavior was specified. + */ + override def default(key: Long) = defaultEntry(key) + + private def repack(newMask: Int) { + val ok = _keys + val ov = _values + mask = newMask + _keys = new Array[Long](mask+1) + _values = new Array[AnyRef](mask+1) + _vacant = 0 + var i = 0 + while (i < ok.length) { + val k = ok(i) + if (k != -k) { + val j = seekEmpty(k) + _keys(j) = k + _values(j) = ov(i) + } + i += 1 + } + } + + /** Repacks the contents of this `LongMap` for maximum efficiency of lookup. + * + * For maps that undergo a complex creation process with both addition and + * removal of keys, and then are used heavily with no further removal of + * elements, calling `repack` after the end of the creation can result in + * improved performance. Repacking takes time proportional to the number + * of entries in the map. + */ + def repack() { + var m = mask + if (_size + _vacant >= 0.5*mask && !(_vacant > 0.2*mask)) m = ((m << 1) + 1) & IndexMask + while (m > 8 && 8*_size < m) m = m >>> 1 + repack(m) + } + + override def put(key: Long, value: V): Option[V] = { + if (key == -key) { + if (key == 0) { + val ans = if ((extraKeys&1) == 1) Some(zeroValue.asInstanceOf[V]) else None + zeroValue = value.asInstanceOf[AnyRef] + extraKeys |= 1 + ans + } + else { + val ans = if ((extraKeys&2) == 1) Some(minValue.asInstanceOf[V]) else None + minValue = value.asInstanceOf[AnyRef] + extraKeys |= 2 + ans + } + } + else { + val i = seekEntryOrOpen(key) + if (i < 0) { + val j = i & IndexMask + _keys(j) = key + _values(j) = value.asInstanceOf[AnyRef] + _size += 1 + if ((i & VacantBit) != 0) _vacant -= 1 + else if (imbalanced) repack() + None + } + else { + val ans = Some(_values(i).asInstanceOf[V]) + _keys(i) = key + _values(i) = value.asInstanceOf[AnyRef] + ans + } + } + } + + /** Updates the map to include a new key-value pair. + * + * This is the fastest way to add an entry to a `LongMap`. + */ + override def update(key: Long, value: V): Unit = { + if (key == -key) { + if (key == 0) { + zeroValue = value.asInstanceOf[AnyRef] + extraKeys |= 1 + } + else { + minValue = value.asInstanceOf[AnyRef] + extraKeys |= 2 + } + } + else { + var i = seekEntryOrOpen(key) + if (i < 0) { + val j = i & IndexMask + _keys(j) = key + _values(j) = value.asInstanceOf[AnyRef] + _size += 1 + if ((i & VacantBit) != 0) _vacant -= 1 + else if (imbalanced) repack() + } + else { + _keys(i) = key + _values(i) = value.asInstanceOf[AnyRef] + } + } + } + + /** Adds a new key/value pair to this map and returns the map. */ + def +=(key: Long, value: V): this.type = { update(key, value); this } + + def +=(kv: (Long, V)): this.type = { update(kv._1, kv._2); this } + + def -=(key: Long): this.type = { + if (key == -key) { + if (key == 0L) { + extraKeys &= 0x2 + zeroValue = null + } + else { + extraKeys &= 0x1 + minValue = null + } + } + else { + val i = seekEntry(key) + if (i >= 0) { + _size -= 1 + _vacant += 1 + _keys(i) = Long.MinValue + _values(i) = null + } + } + this + } + + def iterator: Iterator[(Long, V)] = new Iterator[(Long, V)] { + private[this] val kz = _keys + private[this] val vz = _values + + private[this] var nextPair: (Long, V) = + if (extraKeys==0) null + else if ((extraKeys&1)==1) (0L, zeroValue.asInstanceOf[V]) + else (Long.MinValue, minValue.asInstanceOf[V]) + + private[this] var anotherPair: (Long, V) = + if (extraKeys==3) (Long.MinValue, minValue.asInstanceOf[V]) + else null + + private[this] var index = 0 + + def hasNext: Boolean = nextPair != null || (index < kz.length && { + var q = kz(index) + while (q == -q) { + index += 1 + if (index >= kz.length) return false + q = kz(index) + } + nextPair = (kz(index), vz(index).asInstanceOf[V]) + index += 1 + true + }) + def next = { + if (nextPair == null && !hasNext) throw new NoSuchElementException("next") + val ans = nextPair + if (anotherPair != null) { + nextPair = anotherPair + anotherPair = null + } + nextPair = null + ans + } + } + + override def foreach[A](f: ((Long,V)) => A) { + var i,j = 0 + while (i < _keys.length & j < _size) { + val k = _keys(i) + if (k != -k) { + j += 1 + f((k, _values(i).asInstanceOf[V])) + } + i += 1 + } + if ((extraKeys & 1) == 1) f((0L, zeroValue.asInstanceOf[V])) + if ((extraKeys & 2) == 2) f((Long.MinValue, minValue.asInstanceOf[V])) + } + + override def clone(): LongMap[V] = { + val kz = java.util.Arrays.copyOf(_keys, _keys.length) + val vz = java.util.Arrays.copyOf(_values, _values.length) + val lm = new LongMap[V](defaultEntry, 1, false) + lm.initializeTo(mask, extraKeys, zeroValue, minValue, _size, _vacant, kz, vz) + lm + } + + /** Applies a function to all keys of this map. */ + def foreachKey[A](f: Long => A) { + var i,j = 0 + while (i < _keys.length & j < _size) { + val k = _keys(i) + if (k != -k) { + j += 1 + f(k) + } + i += 1 + } + if ((extraKeys & 1) == 1) f(0L) + if ((extraKeys & 2) == 2) f(Long.MinValue) + } + + /** Applies a function to all values of this map. */ + def foreachValue[A](f: V => A) { + var i,j = 0 + while (i < _keys.length & j < _size) { + val k = _keys(i) + if (k != -k) { + j += 1 + f(_values(i).asInstanceOf[V]) + } + i += 1 + } + if ((extraKeys & 1) == 1) f(zeroValue.asInstanceOf[V]) + if ((extraKeys & 2) == 2) f(minValue.asInstanceOf[V]) + } + + /** Creates a new `LongMap` with different values. + * Unlike `mapValues`, this method generates a new + * collection immediately. + */ + def mapValuesNow[V1](f: V => V1): LongMap[V1] = { + val lm = new LongMap[V1](LongMap.exceptionDefault, 1, false) + val kz = java.util.Arrays.copyOf(_keys, _keys.length) + val vz = new Array[AnyRef](_values.length) + var i,j = 0 + while (i < _keys.length & j < _size) { + val k = _keys(i) + if (k != -k) { + j += 1 + vz(i) = f(_values(i).asInstanceOf[V]).asInstanceOf[AnyRef] + } + i += 1 + } + val zv = if ((extraKeys & 1) == 1) f(zeroValue.asInstanceOf[V]).asInstanceOf[AnyRef] else null + val mv = if ((extraKeys & 2) == 2) f(minValue.asInstanceOf[V]).asInstanceOf[AnyRef] else null + lm.initializeTo(mask, extraKeys, zv, mv, _size, _vacant, kz, vz) + lm + } + + /** Applies a transformation function to all values stored in this map. + * Note: the default, if any, is not transformed. + */ + def transformValues(f: V => V): this.type = { + var i,j = 0 + while (i < _keys.length & j < _size) { + val k = _keys(i) + if (k != -k) { + j += 1 + _values(i) = f(_values(i).asInstanceOf[V]).asInstanceOf[AnyRef] + } + i += 1 + } + if ((extraKeys & 1) == 1) zeroValue = f(zeroValue.asInstanceOf[V]).asInstanceOf[AnyRef] + if ((extraKeys & 2) == 2) minValue = f(minValue.asInstanceOf[V]).asInstanceOf[AnyRef] + this + } + + /* + override def toString = { + val sb = new StringBuilder("LongMap(") + var n = 0 + foreach{ case (k,v) => + if (n > 0) sb ++= ", " + sb ++= k.toString + sb ++= " -> " + sb ++= v.toString + n += 1 + } + sb += ')' + sb.result + } + */ +} + +object LongMap { + private final val IndexMask = 0x3FFFFFFF + private final val MissingBit = 0x80000000 + private final val VacantBit = 0x40000000 + private final val MissVacant = 0xC0000000 + + private val exceptionDefault: Long => Nothing = (k: Long) => throw new NoSuchElementException(k.toString) + + implicit def canBuildFrom[V, U]: CanBuildFrom[LongMap[V], (Long, U), LongMap[U]] = + new CanBuildFrom[LongMap[V], (Long, U), LongMap[U]] { + def apply(from: LongMap[V]): LongMapBuilder[U] = apply() + def apply(): LongMapBuilder[U] = new LongMapBuilder[U] + } + + final class LongMapBuilder[V] extends Builder[(Long, V), LongMap[V]] { + private[collection] var elems: LongMap[V] = new LongMap[V] + def +=(entry: (Long, V)): this.type = { + elems += entry + this + } + def clear() { elems = new LongMap[V] } + def result(): LongMap[V] = elems + } + + /** Creates a new `LongMap` with zero or more key/value pairs. */ + def apply[V](elems: (Long, V)*): LongMap[V] = { + val sz = if (elems.hasDefiniteSize) elems.size else 4 + val lm = new LongMap[V](sz * 2) + elems.foreach{ case (k,v) => lm(k) = v } + if (lm.size < (sz>>3)) lm.repack() + lm + } + + /** Creates a new empty `LongMap`. */ + def empty[V]: LongMap[V] = new LongMap[V] + + /** Creates a new empty `LongMap` with the supplied default */ + def withDefault[V](default: Long => V): LongMap[V] = new LongMap[V](default) + + /** Creates a new `LongMap` from arrays of keys and values. + * Equivalent to but more efficient than `LongMap((keys zip values): _*)`. + */ + def fromZip[V](keys: Array[Long], values: Array[V]): LongMap[V] = { + val sz = math.min(keys.length, values.length) + val lm = new LongMap[V](sz * 2) + var i = 0 + while (i < sz) { lm(keys(i)) = values(i); i += 1 } + if (lm.size < (sz>>3)) lm.repack() + lm + } + + /** Creates a new `LongMap` from keys and values. + * Equivalent to but more efficient than `LongMap((keys zip values): _*)`. + */ + def fromZip[V](keys: Iterable[Long], values: Iterable[V]): LongMap[V] = { + val sz = math.min(keys.size, values.size) + val lm = new LongMap[V](sz * 2) + val ki = keys.iterator + val vi = values.iterator + while (ki.hasNext && vi.hasNext) lm(ki.next) = vi.next + if (lm.size < (sz >> 3)) lm.repack() + lm + } +} diff --git a/test/files/run/mutable-longmap.scala b/test/files/run/mutable-longmap.scala new file mode 100644 index 0000000000..07fd80f6f0 --- /dev/null +++ b/test/files/run/mutable-longmap.scala @@ -0,0 +1,79 @@ +object Test extends App { + + import scala.collection.mutable.HashMap; + import scala.collection.mutable.LongMap; + + val keys = Array( + Long.MinValue, Int.MinValue - 1L, Int.MinValue, -9127, -1, + 0, 1, 9127, Int.MaxValue, Long.MaxValue + ) + + val rn = new scala.util.Random(42L) + var lm = LongMap.empty[Long] + val hm = HashMap.empty[Long,Long] + + def checkConsistent = hm.forall{ case (k,v) => lm.get(k).exists(_ == v) } + + assert { + (0 to 10000).forall{ i => + val k = keys(rn.nextInt(keys.length)) + if (rn.nextInt(100) < 2) lm = lm.clone() + if (rn.nextInt(100) < 5) lm.repack() + if (rn.nextBoolean) { + hm += ((k, i)) + rn.nextInt(6) match { + case 0 => lm += ((k, i)) + case 1 => lm += (k, i) + case 2 => lm(k) = i + case 3 => lm.put(k,i) + case 4 => lm ++= List((k,i)) + case _ => if (!lm.contains(k)) lm.getOrElseUpdate(k,i) + else lm += (k,i) + } + } + else { + hm -= k + rn.nextInt(2) match { + case 0 => lm -= k + case _ => lm --= List(k) + } + } + checkConsistent + } + } + + assert { + lm.map{ case (k,v) => -k*k -> v.toString }.getClass == lm.getClass + } + + assert { + val lm2 = new LongMap[Unit](2000000) + for (i <- 0 until 1000000) lm2(i) = () + + lm2.size == 1000000 && + (0 to 1100000 by 100000).forall(i => (lm2 contains i) == i < 1000000) + } + + lm = LongMap(8L -> 22L, -5L -> 5L, Long.MinValue -> 0L) + + assert{ var s = 0L; lm.foreachKey(s += _); s == Long.MinValue + 3 } + assert{ var s = 0L; lm.foreachValue(s += _); s == 27L } + assert { + val m2 = lm.mapValuesNow(_+2) + lm.transformValues(_+2) + m2 == lm && !(m2 eq lm) && (for ((_,v) <- lm) yield v).sum == 33L + } + + assert { + val lm2 = new LongMap[String](_.toString) + lm2 += (5L -> "fish", 0L -> "unicorn") + val hm2 = (new HashMap[Long,String]) ++= lm2 + + List(Long.MinValue, 0L, 1L, 5L).forall(i => + lm2.get(i) == hm2.get(i) && + lm2.getOrElse(i, "") == hm2.getOrElse(i, "") && + lm2(i) == hm2.get(i).getOrElse(i.toString) && + lm2.getOrNull(i) == hm2.get(i).orNull + ) + } +} -- cgit v1.2.3 From 992b90ebcc8cd3415c288a7258015ec35ed9098e Mon Sep 17 00:00:00 2001 From: Rex Kerr Date: Mon, 11 Nov 2013 03:45:48 -0800 Subject: New AnyRefMap fixes SI-5263 to the extent practical. An open addressing hash map based on a similar scheme to mutable.LongMap. It delivers performance equivalent to Java's HashMap for pretty much all AnyRefs, plus it works nicely with Scala's collections. Revisions made sure that all return types in the public API are specified. Also switched to a leading-zeros method of calculating the initial mask (to save a few ns). Also edited out java.util comparison numbers and moved constants to companion. --- .../scala/collection/mutable/AnyRefMap.scala | 451 +++++++++++++++++++++ test/files/run/mutable-anyrefmap.scala | 91 +++++ 2 files changed, 542 insertions(+) create mode 100644 src/library/scala/collection/mutable/AnyRefMap.scala create mode 100644 test/files/run/mutable-anyrefmap.scala diff --git a/src/library/scala/collection/mutable/AnyRefMap.scala b/src/library/scala/collection/mutable/AnyRefMap.scala new file mode 100644 index 0000000000..df74bb5187 --- /dev/null +++ b/src/library/scala/collection/mutable/AnyRefMap.scala @@ -0,0 +1,451 @@ +package scala +package collection +package mutable + +import generic.CanBuildFrom + +/** This class implements mutable maps with `AnyRef` keys based on a hash table with open addressing. + * + * Basic map operations on single entries, including `contains` and `get`, + * are typically significantly faster with `AnyRefMap` than [[HashMap]]. + * Note that numbers and characters are not handled specially in AnyRefMap; + * only plain `equals` and `hashCode` are used in comparisons. + * + * Methods that traverse or regenerate the map, including `foreach` and `map`, + * are not in general faster than with `HashMap`. The methods `foreachKey`, + * `foreachValue`, `mapValuesNow`, and `transformValues` are, however, faster + * than alternative ways to achieve the same functionality. + * + * Maps with open addressing may become less efficient at lookup after + * repeated addition/removal of elements. Although `AnyRefMap` makes a + * decent attempt to remain efficient regardless, calling `repack` + * on a map that will no longer have elements removed but will be + * used heavily may save both time and storage space. + * + * This map is not indended to contain more than 2^29 entries (approximately + * 500 million). The maximum capacity is 2^30, but performance will degrade + * rapidly as 2^30 is approached. + * + */ +final class AnyRefMap[K <: AnyRef, V] private[collection] (defaultEntry: K => V, initialBufferSize: Int, initBlank: Boolean) +extends AbstractMap[K, V] + with Map[K, V] + with MapLike[K, V, AnyRefMap[K, V]] +{ + import AnyRefMap._ + def this() = this(AnyRefMap.exceptionDefault, 16, true) + + /** Creates a new `AnyRefMap` that returns default values according to a supplied key-value mapping. */ + def this(defaultEntry: K => V) = this(defaultEntry, 16, true) + + /** Creates a new `AnyRefMap` with an initial buffer of specified size. + * + * An `AnyRefMap` can typically contain half as many elements as its buffer size + * before it requires resizing. + */ + def this(initialBufferSize: Int) = this(AnyRefMap.exceptionDefault, initialBufferSize, true) + + /** Creates a new `AnyRefMap` with specified default values and initial buffer size. */ + def this(defaultEntry: K => V, initialBufferSize: Int) = this(defaultEntry, initialBufferSize, true) + + private[this] var mask = 0 + private[this] var _size = 0 + private[this] var _vacant = 0 + private[this] var _hashes: Array[Int] = null + private[this] var _keys: Array[AnyRef] = null + private[this] var _values: Array[AnyRef] = null + + if (initBlank) defaultInitialize(initialBufferSize) + + private[this] def defaultInitialize(n: Int) { + mask = + if (n<0) 0x7 + else (((1 << (32 - java.lang.Integer.numberOfLeadingZeros(n-1))) - 1) & 0x3FFFFFFF) | 0x7 + _hashes = new Array[Int](mask+1) + _keys = new Array[AnyRef](mask+1) + _values = new Array[AnyRef](mask+1) + } + + private[collection] def initializeTo( + m: Int, sz: Int, vc: Int, hz: Array[Int], kz: Array[AnyRef], vz: Array[AnyRef] + ) { + mask = m; _size = sz; _vacant = vc; _hashes = hz; _keys = kz; _values = vz + } + + override def size: Int = _size + override def empty: AnyRefMap[K,V] = new AnyRefMap(defaultEntry) + + private def imbalanced: Boolean = + (_size + _vacant) > 0.5*mask || _vacant > _size + + private def hashOf(key: K): Int = { + if (key eq null) 0x41081989 + else { + val h = key.hashCode + // Part of the MurmurHash3 32 bit finalizer + val i = (h ^ (h >>> 16)) * 0x85EBCA6B + val j = (i ^ (i >>> 13)) + if (j==0) 0x41081989 else j & 0x7FFFFFFF + } + } + + private def seekEntry(h: Int, k: AnyRef): Int = { + var e = h & mask + var x = 0 + var g = 0 + while ({ g = _hashes(e); g != 0}) { + if (g == h && { val q = _keys(e); (q eq k) || ((q ne null) && (q equals k)) }) return e + x += 1 + e = (e + 2*(x+1)*x - 3) & mask + } + e | MissingBit + } + + private def seekEntryOrOpen(h: Int, k: AnyRef): Int = { + var e = h & mask + var x = 0 + var g = 0 + var o = -1 + while ({ g = _hashes(e); g != 0}) { + if (g == h && { val q = _keys(e); (q eq k) || ((q ne null) && (q equals k)) }) return e + else if (o == -1 && g+g == 0) o = e + x += 1 + e = (e + 2*(x+1)*x - 3) & mask + } + if (o >= 0) o | MissVacant else e | MissingBit + } + + override def contains(key: K): Boolean = seekEntry(hashOf(key), key) >= 0 + + override def get(key: K): Option[V] = { + val i = seekEntry(hashOf(key), key) + if (i < 0) None else Some(_values(i).asInstanceOf[V]) + } + + override def getOrElse[V1 >: V](key: K, default: => V1): V1 = { + val i = seekEntry(hashOf(key), key) + if (i < 0) default else _values(i).asInstanceOf[V] + } + + override def getOrElseUpdate(key: K, defaultValue: => V): V = { + val h = hashOf(key) + val i = seekEntryOrOpen(h, key) + if (i < 0) { + val value = defaultValue + _size += 1 + val j = i & IndexMask + _hashes(j) = h + _keys(j) = key.asInstanceOf[AnyRef] + _values(j) = value.asInstanceOf[AnyRef] + if ((i & VacantBit) != 0) _vacant -= 1 + else if (imbalanced) repack() + value + } + else _values(i).asInstanceOf[V] + } + + /** Retrieves the value associated with a key, or the default for that type if none exists + * (null for AnyRef, 0 for floats and integers). + * + * Note: this is the fastest way to retrieve a value that may or + * may not exist, if the default null/zero is acceptable. For key/value + * pairs that do exist, `apply` (i.e. `map(key)`) is equally fast. + */ + def getOrNull(key: K): V = { + val i = seekEntry(hashOf(key), key) + (if (i < 0) null else _values(i)).asInstanceOf[V] + } + + /** Retrieves the value associated with a key. + * If the key does not exist in the map, the `defaultEntry` for that key + * will be returned instead; an exception will be thrown if no + * `defaultEntry` was supplied. + */ + override def apply(key: K): V = { + val i = seekEntry(hashOf(key), key) + if (i < 0) defaultEntry(key) else _values(i).asInstanceOf[V] + } + + /** Defers to defaultEntry to find a default value for the key. Throws an + * exception if no other default behavior was specified. + */ + override def default(key: K) = defaultEntry(key) + + private def repack(newMask: Int) { + val oh = _hashes + val ok = _keys + val ov = _values + mask = newMask + _hashes = new Array[Int](mask+1) + _keys = new Array[AnyRef](mask+1) + _values = new Array[AnyRef](mask+1) + _vacant = 0 + var i = 0 + while (i < oh.length) { + val h = oh(i) + if (h+h != 0) { + var e = h & mask + var x = 0 + while (_hashes(e) != 0) { x += 1; e = (e + 2*(x+1)*x - 3) & mask } + _hashes(e) = h + _keys(e) = ok(i) + _values(e) = ov(i) + } + i += 1 + } + } + + /** Repacks the contents of this `AnyRefMap` for maximum efficiency of lookup. + * + * For maps that undergo a complex creation process with both addition and + * removal of keys, and then are used heavily with no further removal of + * elements, calling `repack` after the end of the creation can result in + * improved performance. Repacking takes time proportional to the number + * of entries in the map. + */ + def repack() { + var m = mask + if (_size + _vacant >= 0.5*mask && !(_vacant > 0.2*mask)) m = ((m << 1) + 1) & IndexMask + while (m > 8 && 8*_size < m) m = m >>> 1 + repack(m) + } + + override def put(key: K, value: V): Option[V] = { + val h = hashOf(key) + val k = key + var i = seekEntryOrOpen(h, k) + if (i < 0) { + val j = i & IndexMask + _hashes(j) = h + _keys(j) = k + _values(j) = value.asInstanceOf[AnyRef] + _size += 1 + if ((i & VacantBit) != 0) _vacant -= 1 + else if (imbalanced) repack() + None + } + else { + val ans = Some(_values(i).asInstanceOf[V]) + _hashes(i) = h + _keys(i) = k + _values(i) = value.asInstanceOf[AnyRef] + ans + } + } + + /** Updates the map to include a new key-value pair. + * + * This is the fastest way to add an entry to an `AnyRefMap`. + */ + override def update(key: K, value: V): Unit = { + val h = hashOf(key) + val k = key + var i = seekEntryOrOpen(h, k) + if (i < 0) { + val j = i & IndexMask + _hashes(j) = h + _keys(j) = k + _values(j) = value.asInstanceOf[AnyRef] + _size += 1 + if ((i & VacantBit) != 0) _vacant -= 1 + else if (imbalanced) repack() + } + else { + _hashes(i) = h + _keys(i) = k + _values(i) = value.asInstanceOf[AnyRef] + } + } + + /** Adds a new key/value pair to this map and returns the map. */ + def +=(key: K, value: V): this.type = { update(key, value); this } + + def +=(kv: (K, V)): this.type = { update(kv._1, kv._2); this } + + def -=(key: K): this.type = { + val i = seekEntry(hashOf(key), key) + if (i >= 0) { + _size -= 1 + _vacant += 1 + _hashes(i) = Int.MinValue + _keys(i) = null + _values(i) = null + } + this + } + + def iterator: Iterator[(K, V)] = new Iterator[(K, V)] { + private[this] val hz = _hashes + private[this] val kz = _keys + private[this] val vz = _values + + private[this] var index = 0 + private[this] var found = false + + def hasNext = found || (index A) { + var i = 0 + var e = _size + while (e > 0) { + while(i < _hashes.length && { val h = _hashes(i); h+h == 0 && i < _hashes.length}) i += 1 + if (i < _hashes.length) { + f((_keys(i).asInstanceOf[K], _values(i).asInstanceOf[V])) + i += 1 + e -= 1 + } + else return + } + } + + override def clone(): AnyRefMap[K, V] = { + val hz = java.util.Arrays.copyOf(_hashes, _hashes.length) + val kz = java.util.Arrays.copyOf(_keys, _keys.length) + val vz = java.util.Arrays.copyOf(_values, _values.length) + val arm = new AnyRefMap[K, V](defaultEntry, 1, false) + arm.initializeTo(mask, _size, _vacant, hz, kz, vz) + arm + } + + private[this] def foreachElement[A,B](elems: Array[AnyRef], f: A => B) { + var i,j = 0 + while (i < _hashes.length & j < _size) { + val h = _hashes(i) + if (h+h != 0) { + j += 1 + f(elems(i).asInstanceOf[A]) + } + i += 1 + } + } + + /** Applies a function to all keys of this map. */ + def foreachKey[A](f: K => A) { foreachElement[K,A](_keys, f) } + + /** Applies a function to all values of this map. */ + def foreachValue[A](f: V => A) { foreachElement[V,A](_values, f) } + + /** Creates a new `AnyRefMap` with different values. + * Unlike `mapValues`, this method generates a new + * collection immediately. + */ + def mapValuesNow[V1](f: V => V1): AnyRefMap[K, V1] = { + val arm = new AnyRefMap[K,V1](AnyRefMap.exceptionDefault, 1, false) + val hz = java.util.Arrays.copyOf(_hashes, _hashes.length) + val kz = java.util.Arrays.copyOf(_keys, _keys.length) + val vz = new Array[AnyRef](_values.length) + var i,j = 0 + while (i < _hashes.length & j < _size) { + val h = _hashes(i) + if (h+h != 0) { + j += 1 + vz(i) = f(_values(i).asInstanceOf[V]).asInstanceOf[AnyRef] + } + i += 1 + } + arm.initializeTo(mask, _size, _vacant, hz, kz, vz) + arm + } + + /** Applies a transformation function to all values stored in this map. + * Note: the default, if any, is not transformed. + */ + def transformValues(f: V => V): this.type = { + var i,j = 0 + while (i < _hashes.length & j < _size) { + val h = _hashes(i) + if (h+h != 0) { + j += 1 + _values(i) = f(_values(i).asInstanceOf[V]).asInstanceOf[AnyRef] + } + i += 1 + } + this + } + +} + +object AnyRefMap { + private final val IndexMask = 0x3FFFFFFF + private final val MissingBit = 0x80000000 + private final val VacantBit = 0x40000000 + private final val MissVacant = 0xC0000000 + + private val exceptionDefault = (k: Any) => throw new NoSuchElementException(if (k == null) "(null)" else k.toString) + + implicit def canBuildFrom[K <: AnyRef, V, J <: AnyRef, U]: CanBuildFrom[AnyRefMap[K,V], (J, U), AnyRefMap[J,U]] = + new CanBuildFrom[AnyRefMap[K,V], (J, U), AnyRefMap[J,U]] { + def apply(from: AnyRefMap[K,V]): AnyRefMapBuilder[J, U] = apply() + def apply(): AnyRefMapBuilder[J, U] = new AnyRefMapBuilder[J, U] + } + + final class AnyRefMapBuilder[K <: AnyRef, V] extends Builder[(K, V), AnyRefMap[K, V]] { + private[collection] var elems: AnyRefMap[K, V] = new AnyRefMap[K, V] + def +=(entry: (K, V)): this.type = { + elems += entry + this + } + def clear() { elems = new AnyRefMap[K, V] } + def result(): AnyRefMap[K, V] = elems + } + + /** Creates a new `AnyRefMap` with zero or more key/value pairs. */ + def apply[K <: AnyRef, V](elems: (K, V)*): AnyRefMap[K, V] = { + val sz = if (elems.hasDefiniteSize) elems.size else 4 + val arm = new AnyRefMap[K, V](sz * 2) + elems.foreach{ case (k,v) => arm(k) = v } + if (arm.size < (sz>>3)) arm.repack() + arm + } + + /** Creates a new empty `AnyRefMap`. */ + def empty[K <: AnyRef, V]: AnyRefMap[K, V] = new AnyRefMap[K, V] + + /** Creates a new empty `AnyRefMap` with the supplied default */ + def withDefault[K <: AnyRef, V](default: K => V): AnyRefMap[K, V] = new AnyRefMap[K, V](default) + + /** Creates a new `AnyRefMap` from arrays of keys and values. + * Equivalent to but more efficient than `AnyRefMap((keys zip values): _*)`. + */ + def fromZip[K <: AnyRef, V](keys: Array[K], values: Array[V]): AnyRefMap[K, V] = { + val sz = math.min(keys.length, values.length) + val arm = new AnyRefMap[K, V](sz * 2) + var i = 0 + while (i < sz) { arm(keys(i)) = values(i); i += 1 } + if (arm.size < (sz>>3)) arm.repack() + arm + } + + /** Creates a new `AnyRefMap` from keys and values. + * Equivalent to but more efficient than `AnyRefMap((keys zip values): _*)`. + */ + def fromZip[K <: AnyRef, V](keys: Iterable[K], values: Iterable[V]): AnyRefMap[K, V] = { + val sz = math.min(keys.size, values.size) + val arm = new AnyRefMap[K, V](sz * 2) + val ki = keys.iterator + val vi = values.iterator + while (ki.hasNext && vi.hasNext) arm(ki.next) = vi.next + if (arm.size < (sz >> 3)) arm.repack() + arm + } +} diff --git a/test/files/run/mutable-anyrefmap.scala b/test/files/run/mutable-anyrefmap.scala new file mode 100644 index 0000000000..ff615d0daf --- /dev/null +++ b/test/files/run/mutable-anyrefmap.scala @@ -0,0 +1,91 @@ +object Test extends App { + + import scala.collection.mutable.HashMap; + import scala.collection.mutable.AnyRefMap; + + val keys = Array( + null, "perch", "herring", "salmon", "pike", "cod", "" + ) + + val rn = new scala.util.Random(42L) + var arm = AnyRefMap.empty[String, Int] + val hm = HashMap.empty[String, Int] + + def checkConsistent = hm.forall{ case (k,v) => arm.get(k).exists(_ == v) } + + assert { + (0 to 10000).forall{ i => + val k = keys(rn.nextInt(keys.length)) + if (rn.nextInt(100) < 2) arm = arm.clone() + if (rn.nextInt(100) < 5) arm.repack() + if (rn.nextBoolean) { + hm += ((k, i)) + rn.nextInt(6) match { + case 0 => arm += ((k, i)) + case 1 => arm += (k, i) + case 2 => arm(k) = i + case 3 => arm.put(k,i) + case 4 => arm ++= List((k,i)) + case _ => if (!arm.contains(k)) arm.getOrElseUpdate(k,i) + else arm += (k,i) + } + } + else { + hm -= k + rn.nextInt(2) match { + case 0 => arm -= k + case _ => arm --= List(k) + } + } + checkConsistent + } + } + + assert { + val mapped = + arm.map{ case (k,v) => (if (k==null) "" else k+k) -> v.toString } + mapped.getClass == arm.getClass + } + + assert { + val arm2 = new AnyRefMap[java.lang.Integer,Unit](2000000) + for (i <- 0 until 1000000) arm2(java.lang.Integer.valueOf(i)) = () + + arm2.size == 1000000 && + (0 to 1100000 by 100000).map(java.lang.Integer.valueOf).forall(i => (arm2 contains i) == i < 1000000) + } + + arm = AnyRefMap("heron" -> 22, "dove" -> 5, "budgie" -> 0) + + assert{ + var s = "" + arm.foreachKey(s += _) + + s.length == "herondovebudgie".length && + s.contains("heron") && + s.contains("dove") && + s.contains("budgie") + } + + assert{ var s = 0L; arm.foreachValue(s += _); s == 27L } + + assert { + val m2 = arm.mapValuesNow(_+2) + arm.transformValues(_+2) + m2 == arm && !(m2 eq arm) && (for ((_,v) <- arm) yield v).sum == 33L + } + + assert { + val arm2 = new AnyRefMap[String, String](x => if (x==null) "null" else x) + arm2 += ("cod" -> "fish", "Rarity" -> "unicorn") + val hm2 = (new HashMap[String,String]) ++= arm2 + + List(null, "cod", "sparrow", "Rarity").forall(i => + arm2.get(i) == hm2.get(i) && + arm2.getOrElse(i, "") == hm2.getOrElse(i, "") && + arm2(i) == hm2.get(i).getOrElse(if (i==null) "null" else i.toString) && + arm2.getOrNull(i) == hm2.get(i).orNull + ) + } +} + -- cgit v1.2.3