summaryrefslogtreecommitdiff
path: root/test/benchmarks
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2011-09-08 05:43:32 +0000
committerPaul Phillips <paulp@improving.org>2011-09-08 05:43:32 +0000
commit52c1d019d63d2fffadc8f3f159d654187ac61ece (patch)
tree6958700cb96b574b0894b3792f76be68d2a7161d /test/benchmarks
parentc37e8f45cf86d0a3f6cb084e85f9751f5e906bf2 (diff)
downloadscala-52c1d019d63d2fffadc8f3f159d654187ac61ece.tar.gz
scala-52c1d019d63d2fffadc8f3f159d654187ac61ece.tar.bz2
scala-52c1d019d63d2fffadc8f3f159d654187ac61ece.zip
Refinement of murmurhash implementation.
Integrates recent speed improvements to algorithm. Contributed by Ruediger Keller, no review.
Diffstat (limited to 'test/benchmarks')
-rw-r--r--test/benchmarks/src/scala/util/HashSpeedTest.scala148
1 files changed, 80 insertions, 68 deletions
diff --git a/test/benchmarks/src/scala/util/HashSpeedTest.scala b/test/benchmarks/src/scala/util/HashSpeedTest.scala
index 615bd47aa2..5f6915e4fc 100644
--- a/test/benchmarks/src/scala/util/HashSpeedTest.scala
+++ b/test/benchmarks/src/scala/util/HashSpeedTest.scala
@@ -1,65 +1,71 @@
object HashSpeedTest {
- import System.{nanoTime => now}
+
+ import System.{ nanoTime => now }
def time[A](f: => A) = {
val t0 = now
val ans = f
(ans, now - t0)
}
+
def ptime[A](f: => A) = {
- val (ans,dt) = time(f)
- printf("Elapsed: %.3f\n",dt*1e-9)
+ val (ans, dt) = time(f)
+ printf("Elapsed: %.3f\n", dt * 1e-9)
ans
}
object HashHist {
var enabled = true
- val counts = new collection.mutable.HashMap[Int,Int]
- def add (i: Int) { if (enabled) counts(i) = counts.get(i).getOrElse(0)+1 }
+ val counts = new collection.mutable.HashMap[Int, Int]
+ def add(i: Int) { if (enabled) counts(i) = counts.get(i).getOrElse(0) + 1 }
def resultAndReset = {
var s = 0L
var o = 0L
var m = 0
counts.valuesIterator.foreach(i => {
s += i
- if (i>0) o += 1
- if (i>m) m = i
+ if (i > 0) o += 1
+ if (i > m) m = i
})
counts.clear
- (s,o,m)
+ (s, o, m)
}
}
- def report(s: String,res: (Long,Long,Int)) {
- println("Hash quality of "+s)
- printf(" %5.2f%% of entries are collisions\n",100*(res._1 - res._2).toDouble/res._1)
- printf(" Max of %d entries mapped to the same value\n",res._3)
+ def report(s: String, res: (Long, Long, Int)) {
+ println("Hash quality of " + s)
+ printf(" %5.2f%% of entries are collisions\n", 100 * (res._1 - res._2).toDouble / res._1)
+ printf(" Max of %d entries mapped to the same value\n", res._3)
}
// If you have MurmurHash3 installed, uncomment below (and in main)
- import scala.util.{MurmurHash3 => MH3}
+ import scala.util.{ MurmurHash3 => MH3 }
+
val justCountString: String => Unit = str => {
- var s,i = 0
+ var s, i = 0
while (i < str.length) { s += str.charAt(i); i += 1 }
HashHist.add(s)
}
+
val defaultHashString: String => Unit = str => HashHist.add(str.hashCode)
+
val murmurHashString: String => Unit = str => HashHist.add(MH3.stringHash(str))
+
def makeCharStrings = {
val a = new Array[Byte](4)
val buffer = new collection.mutable.ArrayBuffer[String]
var i: Int = 'A'
while (i <= 'Z') {
- a(0) = (i&0xFF).toByte
+ a(0) = (i & 0xFF).toByte
var j: Int = 'a'
while (j <= 'z') {
- a(1) = (j&0xFF).toByte
+ a(1) = (j & 0xFF).toByte
var k: Int = 'A'
while (k <= 'z') {
- a(2) = (k&0xFF).toByte
+ a(2) = (k & 0xFF).toByte
var l: Int = 'A'
while (l <= 'z') {
- a(3) = (l&0xFF).toByte
+ a(3) = (l & 0xFF).toByte
buffer += new String(a)
l += 1
}
@@ -71,6 +77,7 @@ object HashSpeedTest {
}
buffer.toArray
}
+
def hashCharStrings(ss: Array[String], hash: String => Unit) {
var i = 0
while (i < ss.length) {
@@ -84,13 +91,15 @@ object HashSpeedTest {
lli.foreach(_.foreach(s += _))
HashHist.add(s)
}
+
def defaultHashList: List[List[Int]] => Unit = lli => HashHist.add(lli.hashCode)
+
def makeBinaryLists = {
def singleLists(depth: Int): List[List[Int]] = {
if (depth <= 0) List(Nil)
else {
- val set = singleLists(depth-1)
- val longest = set filter (_.length == depth-1)
+ val set = singleLists(depth - 1)
+ val longest = set filter (_.length == depth - 1)
set ::: (longest.map(0 :: _)) ::: (longest.map(1 :: _))
}
}
@@ -123,6 +132,7 @@ object HashSpeedTest {
}
buffer.toArray
}
+
def hashBinaryLists(ls: Array[List[List[Int]]], hash: List[List[Int]] => Unit) {
var i = 0
while (i < ls.length) {
@@ -136,17 +146,20 @@ object HashSpeedTest {
si.foreach(s += _)
HashHist.add(s)
}
+
def defaultHashSets: Set[Int] => Unit = si => HashHist.add(si.hashCode)
+
def makeIntSets = {
def sets(depth: Int): List[Set[Int]] = {
if (depth <= 0) List(Set.empty[Int])
else {
- val set = sets(depth-1)
+ val set = sets(depth - 1)
set ::: set.map(_ + depth)
}
}
sets(20).toArray
}
+
def hashIntSets(ss: Array[Set[Int]], hash: Set[Int] => Unit) {
var i = 0
while (i < ss.length) {
@@ -156,36 +169,37 @@ object HashSpeedTest {
}
def defaultHashTuples: (Product with Serializable) => Unit = p => HashHist.add(p.hashCode)
+
def makeNestedTuples = {
val basic = Array(
- (0,0),
- (0,1),
- (1,0),
- (1,1),
- (0,0,0),
- (0,0,1),
- (0,1,0),
- (1,0,0),
- (0,0,0,0),
- (0,0,0,0,0),
- (false,false),
- (true,false),
- (false,true),
- (true,true),
- (0.7,true,"fish"),
- ((),true,'c',400,9.2,"galactic")
- )
+ (0, 0),
+ (0, 1),
+ (1, 0),
+ (1, 1),
+ (0, 0, 0),
+ (0, 0, 1),
+ (0, 1, 0),
+ (1, 0, 0),
+ (0, 0, 0, 0),
+ (0, 0, 0, 0, 0),
+ (false, false),
+ (true, false),
+ (false, true),
+ (true, true),
+ (0.7, true, "fish"),
+ ((), true, 'c', 400, 9.2, "galactic"))
basic ++
- (for (i <- basic; j <- basic) yield (i,j)) ++
- (for (i <- basic; j <- basic; k <- basic) yield (i,j,k)) ++
- (for (i <- basic; j <- basic; k <- basic) yield ((i,j),k)) ++
- (for (i <- basic; j <- basic; k <- basic) yield (i,(j,k))) ++
- (for (i <- basic; j <- basic; k <- basic; l <- basic) yield (i,j,k,l)) ++
- (for (i <- basic; j <- basic; k <- basic; l <- basic) yield ((i,j),(k,l))) ++
- (for (i <- basic; j <- basic; k <- basic; l <- basic) yield (i,(j,k,l))) ++
- (for (i <- basic; j <- basic; k <- basic; l <- basic; m <- basic) yield (i,j,k,l,m)) ++
- (for (i <- basic; j <- basic; k <- basic; l <- basic; m <- basic) yield (i,(j,(k,(l,m)))))
+ (for (i <- basic; j <- basic) yield (i, j)) ++
+ (for (i <- basic; j <- basic; k <- basic) yield (i, j, k)) ++
+ (for (i <- basic; j <- basic; k <- basic) yield ((i, j), k)) ++
+ (for (i <- basic; j <- basic; k <- basic) yield (i, (j, k))) ++
+ (for (i <- basic; j <- basic; k <- basic; l <- basic) yield (i, j, k, l)) ++
+ (for (i <- basic; j <- basic; k <- basic; l <- basic) yield ((i, j), (k, l))) ++
+ (for (i <- basic; j <- basic; k <- basic; l <- basic) yield (i, (j, k, l))) ++
+ (for (i <- basic; j <- basic; k <- basic; l <- basic; m <- basic) yield (i, j, k, l, m)) ++
+ (for (i <- basic; j <- basic; k <- basic; l <- basic; m <- basic) yield (i, (j, (k, (l, m)))))
}
+
def hashNestedTuples(ts: Array[Product with Serializable], hash: (Product with Serializable) => Unit) {
var i = 0
while (i < ts.length) {
@@ -194,21 +208,21 @@ object HashSpeedTest {
}
}
- def findSpeed[A](n: Int, h: (Array[A],A=>Unit)=>Unit, aa: Array[A], f: A=>Unit) = {
- (time { for (i <- 1 to n) { h(aa,f) } }._2, aa.length.toLong*n)
+ def findSpeed[A](n: Int, h: (Array[A], A => Unit) => Unit, aa: Array[A], f: A => Unit) = {
+ (time { for (i <- 1 to n) { h(aa, f) } }._2, aa.length.toLong * n)
}
- def reportSpeed[A](repeats: Int, xs: List[(String, ()=>(Long,Long))]) {
- val tn = Array.fill(xs.length)((0L,0L))
+ def reportSpeed[A](repeats: Int, xs: List[(String, () => (Long, Long))]) {
+ val tn = Array.fill(xs.length)((0L, 0L))
for (j <- 1 to repeats) {
- for ((l,i) <- xs zipWithIndex) {
+ for ((l, i) <- xs zipWithIndex) {
val x = l._2()
tn(i) = (tn(i)._1 + x._1, tn(i)._2 + x._2)
}
}
- for (((t,n),(title,_)) <- (tn zip xs)) {
- val rate = (n*1e-6)/(t*1e-9)
- printf("Hash rate for %s: %4.2f million/second\n",title,rate)
+ for (((t, n), (title, _)) <- (tn zip xs)) {
+ val rate = (n * 1e-6) / (t * 1e-9)
+ printf("Hash rate for %s: %4.2f million/second\n", title, rate)
}
}
@@ -218,24 +232,22 @@ object HashSpeedTest {
val nt = makeNestedTuples
// Uncomment the following for string stats if MurmurHash3 available
val cs = makeCharStrings
- report("Java String hash for strings",{ hashCharStrings(cs,defaultHashString); HashHist.resultAndReset })
- report("MurmurHash3 for strings",{ hashCharStrings(cs,murmurHashString); HashHist.resultAndReset })
+ report("Java String hash for strings", { hashCharStrings(cs, defaultHashString); HashHist.resultAndReset })
+ report("MurmurHash3 for strings", { hashCharStrings(cs, murmurHashString); HashHist.resultAndReset })
HashHist.enabled = false
reportSpeed(3, List(
- ("Java string hash", () => findSpeed[String](30, (x, y) => hashCharStrings(x, y),cs,defaultHashString)),
- ("MurmurHash3 string hash", () => findSpeed[String](30,(x, y) => hashCharStrings(x, y),cs,murmurHashString))
- ))
+ ("Java string hash", () => findSpeed[String](30, (x, y) => hashCharStrings(x, y), cs, defaultHashString)),
+ ("MurmurHash3 string hash", () => findSpeed[String](30, (x, y) => hashCharStrings(x, y), cs, murmurHashString))))
// reportSpeed("Java string hash",30,hashCharStrings.tupled,cs,defaultHashString)
// reportSpeed("MurmurHash3 string hash",30,hashCharStrings.tupled,cs,murmurHashString)
HashHist.enabled = true
- report("lists of binary int lists",{ hashBinaryLists(bl,defaultHashList); HashHist.resultAndReset })
- report("small integer sets",{ hashIntSets(is,defaultHashSets); HashHist.resultAndReset })
- report("small nested tuples",{ hashNestedTuples(nt,defaultHashTuples); HashHist.resultAndReset })
+ report("lists of binary int lists", { hashBinaryLists(bl, defaultHashList); HashHist.resultAndReset })
+ report("small integer sets", { hashIntSets(is, defaultHashSets); HashHist.resultAndReset })
+ report("small nested tuples", { hashNestedTuples(nt, defaultHashTuples); HashHist.resultAndReset })
HashHist.enabled = false
- reportSpeed(3,List(
- ("lists of lists of binary ints", () => findSpeed(20,hashBinaryLists,bl,defaultHashList)),
- ("small integer sets", () => findSpeed(10,hashIntSets,is,defaultHashSets)),
- ("small nested tuples", () => findSpeed(5,hashNestedTuples,nt,defaultHashTuples))
- ))
+ reportSpeed(3, List(
+ ("lists of lists of binary ints", () => findSpeed(20, hashBinaryLists, bl, defaultHashList)),
+ ("small integer sets", () => findSpeed(10, hashIntSets, is, defaultHashSets)),
+ ("small nested tuples", () => findSpeed(5, hashNestedTuples, nt, defaultHashTuples))))
}
}