summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Zaugg <jzaugg@gmail.com>2016-11-30 14:54:28 +1000
committerGitHub <noreply@github.com>2016-11-30 14:54:28 +1000
commit711e261717c37dcc56e4f67230a4e8b4f5500700 (patch)
tree963d71947e5ae606d148c5dd8ffe8b8e62aa7631
parentd0da258116109272e5f76ea1722af5458bd588e3 (diff)
parenta5014447861a5678c8b595e235019bb8fec098a7 (diff)
downloadscala-711e261717c37dcc56e4f67230a4e8b4f5500700.tar.gz
scala-711e261717c37dcc56e4f67230a4e8b4f5500700.tar.bz2
scala-711e261717c37dcc56e4f67230a4e8b4f5500700.zip
Merge pull request #5537 from paplorinc/hashTableIndex
Optimized HashTable.index
-rw-r--r--src/library/scala/collection/mutable/HashTable.scala83
-rw-r--r--test/benchmarks/src/main/scala/scala/BitManipulationBenchmark.scala170
2 files changed, 194 insertions, 59 deletions
diff --git a/src/library/scala/collection/mutable/HashTable.scala b/src/library/scala/collection/mutable/HashTable.scala
index a6a6e1e432..776eafaccc 100644
--- a/src/library/scala/collection/mutable/HashTable.scala
+++ b/src/library/scala/collection/mutable/HashTable.scala
@@ -360,14 +360,14 @@ trait HashTable[A, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashU
protected def elemEquals(key1: A, key2: A): Boolean = (key1 == key2)
- // Note:
- // we take the most significant bits of the hashcode, not the lower ones
- // this is of crucial importance when populating the table in parallel
- protected final def index(hcode: Int) = {
+ /**
+ * Note: we take the most significant bits of the hashcode, not the lower ones
+ * this is of crucial importance when populating the table in parallel
+ */
+ protected final def index(hcode: Int): Int = {
val ones = table.length - 1
- val improved = improve(hcode, seedvalue)
- val shifted = (improved >> (32 - java.lang.Integer.bitCount(ones))) & ones
- shifted
+ val exponent = Integer.numberOfLeadingZeros(ones)
+ (improve(hcode, seedvalue) >>> exponent) & ones
}
protected def initWithContents(c: HashTable.Contents[A, Entry]) = {
@@ -411,58 +411,23 @@ private[collection] object HashTable {
protected def elemHashCode(key: KeyType) = key.##
- protected final def improve(hcode: Int, seed: Int) = {
- /* Murmur hash
- * m = 0x5bd1e995
- * r = 24
- * note: h = seed = 0 in mmix
- * mmix(h,k) = k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; */
- // var k = hcode * 0x5bd1e995
- // k ^= k >> 24
- // k *= 0x5bd1e995
- // k
-
- /* Another fast multiplicative hash
- * by Phil Bagwell
- *
- * Comment:
- * Multiplication doesn't affect all the bits in the same way, so we want to
- * multiply twice, "once from each side".
- * It would be ideal to reverse all the bits after the first multiplication,
- * however, this is more costly. We therefore restrict ourselves only to
- * reversing the bytes before final multiplication. This yields a slightly
- * worse entropy in the lower 8 bits, but that can be improved by adding:
- *
- * `i ^= i >> 6`
- *
- * For performance reasons, we avoid this improvement.
- * */
- val i= scala.util.hashing.byteswap32(hcode)
-
- /* Jenkins hash
- * for range 0-10000, output has the msb set to zero */
- // var h = hcode + (hcode << 12)
- // h ^= (h >> 22)
- // h += (h << 4)
- // h ^= (h >> 9)
- // h += (h << 10)
- // h ^= (h >> 2)
- // h += (h << 7)
- // h ^= (h >> 12)
- // h
-
- /* OLD VERSION
- * quick, but bad for sequence 0-10000 - little entropy in higher bits
- * since 2003 */
- // var h: Int = hcode + ~(hcode << 9)
- // h = h ^ (h >>> 14)
- // h = h + (h << 4)
- // h ^ (h >>> 10)
-
- // the rest of the computation is due to SI-5293
- val rotation = seed % 32
- val rotated = (i >>> rotation) | (i << (32 - rotation))
- rotated
+ /**
+ * Defer to a high-quality hash in [[scala.util.hashing]].
+ * The goal is to distribute across bins as well as possible even if a hash code has low entropy at some bits.
+ * <p/>
+ * OLD VERSION - quick, but bad for sequence 0-10000 - little entropy in higher bits - since 2003
+ * {{{
+ * var h: Int = hcode + ~(hcode << 9)
+ * h = h ^ (h >>> 14)
+ * h = h + (h << 4)
+ * h ^ (h >>> 10)
+ * }}}
+ * the rest of the computation is due to SI-5293
+ */
+ protected final def improve(hcode: Int, seed: Int): Int = {
+ val hash = scala.util.hashing.byteswap32(hcode)
+ val shift = seed & ((1 << 5) - 1)
+ (hash >>> shift) | (hash << (32 - shift))
}
}
diff --git a/test/benchmarks/src/main/scala/scala/BitManipulationBenchmark.scala b/test/benchmarks/src/main/scala/scala/BitManipulationBenchmark.scala
new file mode 100644
index 0000000000..23e303ede0
--- /dev/null
+++ b/test/benchmarks/src/main/scala/scala/BitManipulationBenchmark.scala
@@ -0,0 +1,170 @@
+package scala.collection
+
+import org.openjdk.jmh.annotations._
+import org.openjdk.jmh.infra._
+import org.openjdk.jmh.runner.IterationType
+import benchmark._
+import java.util.concurrent.TimeUnit
+
+@BenchmarkMode(Array(Mode.AverageTime))
+@Fork(2)
+@Threads(1)
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Benchmark)
+class BitManipulationBenchmark {
+ val powersOfTwo = Array(1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824)
+
+ //////////////////////////////////////////////
+
+ @Benchmark def withIntegerBitCount(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = withIntegerBitCount(v)
+ // assert (leadingZeros == withLoop(v), s"$leadingZeros != ${withLoop(v)} ($v)")
+ bh.consume(leadingZeros)
+ }
+ }
+
+ private def withIntegerBitCount(v: Int) = Integer.SIZE - Integer.bitCount(v - 1)
+
+ //////////////////////////////////////////////
+
+ @Benchmark def withIntegerNumberOfLeadingZeros(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = withIntegerNumberOfLeadingZeros(v)
+ // assert (leadingZeros == withLoop(v), s"$leadingZeros != ${withLoop(v)} ($v)")
+ bh.consume(leadingZeros)
+ }
+ }
+
+ private def withIntegerNumberOfLeadingZeros(v: Int) = Integer.numberOfLeadingZeros(v - 1)
+
+ //////////////////////////////////////////////
+
+ @Benchmark def withLoop(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = withLoop(v)
+ bh.consume(leadingZeros)
+ }
+ }
+
+ private def withLoop(v: Int): Int = {
+ var r = Integer.SIZE
+ var copy = v >> 1
+ while (copy != 0) {
+ r -= 1
+ copy = copy >> 1
+ }
+ r
+ }
+
+ //////////////////////////////////////////////
+
+ @Benchmark def withMatch(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = withMatch(v)
+ // assert (leadingZeros == withLoop(v), s"$leadingZeros != ${withLoop(v)} ($v)")
+ bh.consume(leadingZeros)
+ }
+ }
+
+ private def withMatch(i: Int) = i match {
+ case 1 => 32
+ case 2 => 31
+ case 4 => 30
+ case 8 => 29
+ case 16 => 28
+ case 32 => 27
+ case 64 => 26
+ case 128 => 25
+ case 256 => 24
+ case 512 => 23
+ case 1024 => 22
+ case 2048 => 21
+ case 4096 => 20
+ case 8192 => 19
+ case 16384 => 18
+ case 32768 => 17
+ case 65536 => 16
+ case 131072 => 15
+ case 262144 => 14
+ case 524288 => 13
+ case 1048576 => 12
+ case 2097152 => 11
+ case 4194304 => 10
+ case 8388608 => 9
+ case 16777216 => 8
+ case 33554432 => 7
+ case 67108864 => 6
+ case 134217728 => 5
+ case 268435456 => 4
+ case 536870912 => 3
+ case 1073741824 => 2
+ }
+
+
+ //////////////////////////////////////////////
+
+ @Benchmark def with2DeBruijn(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = with2DeBruijn(v)
+ // assert (leadingZeros == withLoop(v), s"$leadingZeros != ${withLoop(v)} ($v)")
+ bh.consume(leadingZeros)
+ }
+ }
+
+ // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+ private val multiplyDeBruijnBitPosition2 = Array(32, 31, 4, 30, 3, 18, 8, 29, 2, 10, 12, 17, 7, 15, 28, 24, 1, 5, 19, 9, 11, 13, 16, 25, 6, 20, 14, 26, 21, 27, 22, 23)
+
+ private def with2DeBruijn(v: Int) = multiplyDeBruijnBitPosition2((v * 0x077CB531) >>> 27)
+
+
+ //////////////////////////////////////////////
+
+ @Benchmark def withBinSearch(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = withBinSearch(v)
+ // assert (leadingZeros == withLoop(v), s"$leadingZeros != ${withLoop(v)} ($v)")
+ bh.consume(leadingZeros)
+ }
+ }
+
+ private def withBinSearch(v: Int) =
+ if (v < 65536) if (v < 256) if (v < 16) if (v < 4) if (v == 1) 32 else 31
+ else if (v == 4) 30 else 29
+ else if (v < 64) if (v == 16) 28 else 27
+ else if (v == 64) 26 else 25
+ else if (v < 4096) if (v < 1024) if (v == 256) 24 else 23
+ else if (v == 1024) 22 else 21
+ else if (v < 16384) if (v == 4096) 20 else 19
+ else if (v == 16384) 18 else 17
+ else if (v < 16777216) if (v < 1048576) if (v < 262144) if (v == 65536) 16 else 15
+ else if (v == 262144) 14 else 13
+ else if (v < 4194304) if (v == 1048576) 12 else 11
+ else if (v == 4194304) 10 else 9
+ else if (v < 268435456) if (v < 67108864) if (v == 16777216) 8 else 7
+ else if (v == 67108864) 6 else 5
+ else if (v < 1073741824) if (v == 268435456) 4 else 3
+ else if (v == 1073741824) 2 else 1
+
+ //////////////////////////////////////////////
+
+ @Benchmark def withSumBinSearch(bh: Blackhole) {
+ for (v <- powersOfTwo) {
+ val leadingZeros = withSumBinSearch(v)
+ // assert(leadingZeros == withLoop(v), s"$leadingZeros != ${withLoop(v)} ($v)")
+ bh.consume(leadingZeros)
+ }
+ }
+
+ private def withSumBinSearch(v: Int): Int = {
+ var exponent = Integer.SIZE
+ var remaining = v
+ if (remaining >= 65536) { remaining >>>= 16; exponent = 16 }
+ if (remaining >= 256) { remaining >>>= 8; exponent -= 8 }
+ if (remaining >= 16) { remaining >>>= 4; exponent -= 4 }
+ if (remaining >= 4) { remaining >>>= 2; exponent -= 2 }
+ if (remaining >= 2) exponent - 1 else exponent
+ }
+} \ No newline at end of file