1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
package scala.util
import java.lang.Integer.{ rotateLeft => rotl }
/**
* An implementation of Austin Appleby's MurmurHash 3 algorithm
* (MurmurHash3_x86_32).
*
* An algorithm designed to generate well-distributed non-cryptographic
* hashes. It is designed to hash data in 32 bit chunks (ints).
*
* The mix method needs to be called at each step to update the intermediate
* hash value. For the last chunk to incorporate into the hash mixLast may
* be used instead, which is slightly faster. Finally finalizeHash needs to
* be called to compute the final hash value.
*
* This is based on the earlier MurmurHash3 code by Rex Kerr, but the
* MurmurHash3 algorithm was since changed by its creator Austin Appleby
* to remedy some weaknesses and improve performance. This represents the
* latest and supposedly final version of the algortihm (revision 136).
*
* @see http://code.google.com/p/smhasher
*/
object MurmurHash3 {
// Some arbitrary values used as hash seeds
final val arraySeed = 0x3c074a61
final val stringSeed = 0xf7ca7fd2
final val productSeed = 0xcafebabe
final val symmetricSeed = 0xb592f7ae
final val traversableSeed = 0xe73a8b15
/** Mix in a block of data into an intermediate hash value. */
final def mix(hash: Int, data: Int): Int = {
var h = mixLast(hash, data)
h = rotl(h, 13)
h * 5 + 0xe6546b64
}
/** May optionally be used as the last mixing step. Is a little bit faster than mix,
* as it does no further mixing of the resulting hash. For the last element this is not
* necessary as the hash is thoroughly mixed during finalization anyway. */
final def mixLast(hash: Int, data: Int): Int = {
var k = data
k *= 0xcc9e2d51
k = rotl(k, 15)
k *= 0x1b873593
hash ^ k
}
/** Finalize a hash to incorporate the length and make sure all bits avalanche. */
final def finalizeHash(hash: Int, length: Int): Int = avalanche(hash ^ length)
/** Force all bits of the hash to avalanche. Used for finalizing the hash. */
private final def avalanche(hash: Int): Int = {
var h = hash
h ^= h >>> 16
h *= 0x85ebca6b
h ^= h >>> 13
h *= 0xc2b2ae35
h ^= h >>> 16
h
}
/** Compute the hash of a product */
final def productHash(x: Product, seed: Int = productSeed): Int = {
val arr = x.productArity
// Case objects have the hashCode inlined directly into the
// synthetic hashCode method, but this method should still give
// a correct result if passed a case object.
if (arr == 0) {
x.productPrefix.hashCode
}
else {
var h = seed
var i = 0
while (i < arr) {
h = mix(h, x.productElement(i).##)
i += 1
}
finalizeHash(h, arr)
}
}
/** Compute the hash of a string */
final def stringHash(str: String, seed: Int = stringSeed): Int = {
var h = seed
var i = 0
while (i + 1 < str.length) {
val data = (str.charAt(i) << 16) + str.charAt(i + 1)
h = mix(h, data)
i += 2
}
if (i < str.length) h = mixLast(h, str.charAt(i))
finalizeHash(h, str.length)
}
/** Compute a hash that is symmetric in its arguments - that is a hash
* where the order of appearance of elements does not matter.
* This is useful for hashing sets, for example. */
final def symmetricHash[T](xs: collection.GenTraversableOnce[T], seed: Int = symmetricSeed): Int = {
var a, b, n = 0
var c = 1
xs.seq.foreach { x =>
val h = x.##
a += h
b ^= h
if (h != 0) c *= h
n += 1
}
var h = seed
h = mix(h, a)
h = mix(h, b)
h = mixLast(h, c)
finalizeHash(h, n)
}
/** Compute a hash for a traversable (once). */
final def traversableHash[T](xs: collection.GenTraversableOnce[T], seed: Int = traversableSeed): Int = {
var n = 0
var h = seed
xs.seq.foreach { x =>
h = mix(h, x.##)
n += 1
}
finalizeHash(h, n)
}
/** Compute the hash of an array */
final def arrayHash[@specialized T](a: Array[T], seed: Int = arraySeed): Int = {
var h = seed
var i = 0
while (i < a.length) {
h = mix(h, a(i).##)
i += 1
}
finalizeHash(h, a.length)
}
/** Compute the hash of a byte array. Faster than arrayHash, because
* it hashes 4 bytes at once. */
final def bytesHash(data: Array[Byte], seed: Int = arraySeed): Int = {
var len = data.length
var h = seed
// Body
var i = 0
while(len >= 4) {
var k = data(i + 0) & 0xFF
k |= (data(i + 1) & 0xFF) << 8
k |= (data(i + 2) & 0xFF) << 16
k |= (data(i + 3) & 0xFF) << 24
h = mix(h, k)
i += 4
len -= 4
}
// Tail
var k = 0
if(len == 3) k ^= (data(i + 2) & 0xFF) << 16
if(len >= 2) k ^= (data(i + 1) & 0xFF) << 8
if(len >= 1) {
k ^= (data(i + 0) & 0xFF)
h = mixLast(h, k)
}
// Finalization
finalizeHash(h, data.length)
}
}
|