summaryrefslogtreecommitdiff
path: root/test/files/run/t6253a.scala
diff options
context:
space:
mode:
authorRĂ¼diger Klaehn <rklaehn@gmail.com>2014-01-16 21:39:56 +0100
committerRĂ¼diger Klaehn <rklaehn@gmail.com>2014-01-16 21:39:56 +0100
commit034f6b9452265dcd0e7493ed29971c8864b95c35 (patch)
tree5b584d8c4e982be09a96567b471062dc4dd9f049 /test/files/run/t6253a.scala
parentdddf1f5a79b1f00b6b173439f3df9e9d2f82af3b (diff)
downloadscala-034f6b9452265dcd0e7493ed29971c8864b95c35.tar.gz
scala-034f6b9452265dcd0e7493ed29971c8864b95c35.tar.bz2
scala-034f6b9452265dcd0e7493ed29971c8864b95c35.zip
SI-6253 HashSet should implement union
Implements of HashSet.union that reuses the two trees as much as possible when calculating the union of two sets. This leads to significant performance improvements as well as to much better structural sharing. There is a comprehensive correctness test for union since there was not a single test for HashSet.union before. In addition, there are some tests of the desirable properties of the new implementation (structural sharing and efficiency regarding calls of key.hashCode). The other operations diff and intersect, which are conceptually very similar to union, are also implemented along with comprehensive test cases for both correctness and structural sharing. Note that while it appears that there is some code duplication between the three methods, they are sufficiently different that it is not possible to merge them into one without sacrificing performance.
Diffstat (limited to 'test/files/run/t6253a.scala')
-rw-r--r--test/files/run/t6253a.scala64
1 files changed, 64 insertions, 0 deletions
diff --git a/test/files/run/t6253a.scala b/test/files/run/t6253a.scala
new file mode 100644
index 0000000000..efa3230df6
--- /dev/null
+++ b/test/files/run/t6253a.scala
@@ -0,0 +1,64 @@
+import scala.collection.immutable.HashSet
+
+object Test extends App {
+
+ var hashCount = 0
+
+ /**
+ * A key that produces lots of hash collisions, to exercise the part of the code that deals with those
+ */
+ case class Collision(value: Int) {
+
+ override def hashCode = {
+ // we do not check hash counts for Collision keys because ListSet.++ uses a mutable hashset internally,
+ // so when we have hash collisions, union will call key.hashCode.
+ // hashCount += 1
+ value / 5
+ }
+ }
+
+ /**
+ * A key that is identical to int other than that it counts hashCode invocations
+ */
+ case class HashCounter(value: Int) {
+
+ override def hashCode = {
+ hashCount += 1
+ value
+ }
+ }
+
+ def testUnion[T](sizes: Seq[Int], offsets: Seq[Double], keyType: String, mkKey: Int => T) {
+ for {
+ i <- sizes
+ o <- offsets
+ } {
+ val e = HashSet.empty[T]
+ val j = (i * o).toInt
+ // create two sets of size i with overlap o
+ val a = e ++ (0 until i).map(mkKey)
+ require(a.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val b = e ++ (j until (i + j)).map(mkKey)
+ require(b.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val as = e ++ (0 until j).map(mkKey)
+ require(as.size == j, s"Building HashSet of size $j failed. Key type $keyType.")
+ val hashCount0 = hashCount
+ val u = a union b
+ require(hashCount == hashCount0, s"key.hashCode should not be called, but has been called ${hashCount - hashCount0} times. Key type $keyType.")
+ require(u == (a union scala.collection.mutable.HashSet(b.toSeq: _*)), s"Operation must still work for other sets!")
+ require(u.size == i + j, s"Expected size ${i+j}. Real size ${u.size}. Key type $keyType.")
+ for (x <- 0 until i + j)
+ require(u.contains(mkKey(x)), s"Key type $keyType. Set (0 until ${i + j}) should contain $x but does not.")
+ val a_as = a union as
+ val as_a = as union a
+ require((a_as eq a) || (a_as eq as), s"No structural sharing in a union as. Key type $keyType, a=(0 until $i) as=(0 until $j)")
+ require((as_a eq a) || (as_a eq as), s"No structural sharing in as union a. Key type $keyType, a=(0 until $i) as=(0 until $j)")
+ }
+ }
+
+ val sizes = Seq(1, 10, 100, 1000, 10000, 100000)
+ val offsets = Seq(0.0, 0.25, 0.5, 0.75, 1.0)
+ testUnion(sizes, offsets, "int", identity[Int])
+ testUnion(sizes, offsets, "hashcounter", HashCounter.apply)
+ testUnion(sizes, offsets, "collision", Collision.apply)
+}