summaryrefslogtreecommitdiff
path: root/src/test/scala/spray/json/HashCodeCollider.scala
diff options
context:
space:
mode:
authorJohannes Rudolph <johannes.rudolph@gmail.com>2018-10-30 13:52:29 +0100
committerJohannes Rudolph <johannes.rudolph@gmail.com>2018-11-07 14:51:49 +0100
commit855b35e6d65079085d580ab3063637d94c8f3e0a (patch)
treec703ec80d7544171cef23c92d0bbddd85ba7ea78 /src/test/scala/spray/json/HashCodeCollider.scala
parentcaee7a06b554ca11d95bda2fd9302e4b3b31ef9c (diff)
downloadspray-json-855b35e6d65079085d580ab3063637d94c8f3e0a.tar.gz
spray-json-855b35e6d65079085d580ab3063637d94c8f3e0a.tar.bz2
spray-json-855b35e6d65079085d580ab3063637d94c8f3e0a.zip
CVE-2018-18854 Use TreeMap instead of HashMap for JsObject key/value pairs, fixes #277
The problem is that with String's hashCode implementation it is too simple to create synthetic collisions. This allows an attacker to create an object with keys that all collide which leads to a performance drop for the HashMap just for creating the map in the first place. See https://github.com/scala/bug/issues/11203 for more information about the underlying HashMap issue. For the time being, it seems safer to use a TreeMap which uses String ordering. Benchmarks suggest that using a TreeMap is only ~6% slower for reasonably sized JSON objects up to 100 keys. Benchmark for non-colliding keys: Benchmark (_size) (parser) Mode Cnt Score Error Units ExtractFieldsBenchmark.readSpray 1 HashMap thrpt 5 1195832.262 ± 64366.605 ops/s ExtractFieldsBenchmark.readSpray 1 TreeMap thrpt 5 1342009.641 ± 17307.555 ops/s ExtractFieldsBenchmark.readSpray 10 HashMap thrpt 5 237173.327 ± 70341.742 ops/s ExtractFieldsBenchmark.readSpray 10 TreeMap thrpt 5 233510.618 ± 69638.750 ops/s ExtractFieldsBenchmark.readSpray 100 HashMap thrpt 5 23202.016 ± 1514.763 ops/s ExtractFieldsBenchmark.readSpray 100 TreeMap thrpt 5 21899.072 ± 823.225 ops/s ExtractFieldsBenchmark.readSpray 1000 HashMap thrpt 5 2073.754 ± 66.093 ops/s ExtractFieldsBenchmark.readSpray 1000 TreeMap thrpt 5 1793.329 ± 43.603 ops/s ExtractFieldsBenchmark.readSpray 10000 HashMap thrpt 5 208.160 ± 7.466 ops/s ExtractFieldsBenchmark.readSpray 10000 TreeMap thrpt 5 160.349 ± 5.809 ops/s
Diffstat (limited to 'src/test/scala/spray/json/HashCodeCollider.scala')
-rw-r--r--src/test/scala/spray/json/HashCodeCollider.scala26
1 files changed, 26 insertions, 0 deletions
diff --git a/src/test/scala/spray/json/HashCodeCollider.scala b/src/test/scala/spray/json/HashCodeCollider.scala
new file mode 100644
index 0000000..57388b9
--- /dev/null
+++ b/src/test/scala/spray/json/HashCodeCollider.scala
@@ -0,0 +1,26 @@
+package spray.json
+
+/**
+ * Helper that creates strings that all share the same hashCode == 0.
+ *
+ * Adapted from MIT-licensed code by Andriy Plokhotnyuk
+ * at https://github.com/plokhotnyuk/jsoniter-scala/blob/26b5ecdd4f8c2ab7e97bd8106cefdda4c1e701ce/jsoniter-scala-benchmark/src/main/scala/com/github/plokhotnyuk/jsoniter_scala/macros/HashCodeCollider.scala#L6.
+ */
+object HashCodeCollider {
+ val visibleChars = (33 until 127).filterNot(c => c == '\\' || c == '"')
+ def asciiChars: Iterator[Int] = visibleChars.toIterator
+ def asciiCharsAndHash(previousHash: Int): Iterator[(Int, Int)] = visibleChars.toIterator.map(c => c -> (previousHash + c) * 31)
+
+ /** Creates an iterator of Strings that all have hashCode == 0 */
+ def zeroHashCodeIterator(): Iterator[String] =
+ for {
+ (i0, h0) <- asciiCharsAndHash(0)
+ (i1, h1) <- asciiCharsAndHash(h0)
+ (i2, h2) <- asciiCharsAndHash(h1) if (((h2 + 32) * 923521) ^ ((h2 + 127) * 923521)) < 0
+ (i3, h3) <- asciiCharsAndHash(h2) if (((h3 + 32) * 29791) ^ ((h3 + 127) * 29791)) < 0
+ (i4, h4) <- asciiCharsAndHash(h3) if (((h4 + 32) * 961) ^ ((h4 + 127) * 961)) < 0
+ (i5, h5) <- asciiCharsAndHash(h4) if (((h5 + 32) * 31) ^ ((h5 + 127) * 31)) < 0
+ (i6, h6) <- asciiCharsAndHash(h5) if ((h6 + 32) ^ (h6 + 127)) < 0
+ (i7, h7) <- asciiCharsAndHash(h6) if h6 + i7 == 0
+ } yield new String(Array(i0, i1, i2, i3, i4, i5, i6, i7).map(_.toChar))
+}