aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorPetko Nikolov <nikolov@soundcloud.com>2014-03-27 15:49:07 -0700
committerReynold Xin <rxin@apache.org>2014-03-27 15:49:07 -0700
commit6f986f0b87bd03f4df2bf6c917e61241e9b14ac2 (patch)
tree36117e0f0f046475f2d3496aab0576612d2b14a8 /core
parent53953d0933c0a7c3bd3bc1003954426363912e4b (diff)
downloadspark-6f986f0b87bd03f4df2bf6c917e61241e9b14ac2.tar.gz
spark-6f986f0b87bd03f4df2bf6c917e61241e9b14ac2.tar.bz2
spark-6f986f0b87bd03f4df2bf6c917e61241e9b14ac2.zip
[SPARK-1268] Adding XOR and AND-NOT operations to spark.util.collection.BitSet
Symmetric difference (xor) in particular is useful for computing some distance metrics (e.g. Hamming). Unit tests added. Author: Petko Nikolov <nikolov@soundcloud.com> Closes #172 from petko-nikolov/bitset-imprv and squashes the following commits: 451f28b [Petko Nikolov] fixed style mistakes 5beba18 [Petko Nikolov] rm outer loop in andNot test 0e61035 [Petko Nikolov] conform to spark style; rm redundant asserts; more unit tests added; use arraycopy instead of loop d53cdb9 [Petko Nikolov] rm incidentally added space 4e1df43 [Petko Nikolov] adding xor and and-not to BitSet; unit tests added
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/collection/BitSet.scala39
-rw-r--r--core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala83
2 files changed, 122 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index d3153d2cac..af1f64649f 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -89,6 +89,45 @@ class BitSet(numBits: Int) extends Serializable {
}
/**
+ * Compute the symmetric difference by performing bit-wise XOR of the two sets returning the
+ * result.
+ */
+ def ^(other: BitSet): BitSet = {
+ val newBS = new BitSet(math.max(capacity, other.capacity))
+ val smaller = math.min(numWords, other.numWords)
+ var ind = 0
+ while (ind < smaller) {
+ newBS.words(ind) = words(ind) ^ other.words(ind)
+ ind += 1
+ }
+ if (ind < numWords) {
+ Array.copy( words, ind, newBS.words, ind, numWords - ind )
+ }
+ if (ind < other.numWords) {
+ Array.copy( other.words, ind, newBS.words, ind, other.numWords - ind )
+ }
+ newBS
+ }
+
+ /**
+ * Compute the difference of the two sets by performing bit-wise AND-NOT returning the
+ * result.
+ */
+ def andNot(other: BitSet): BitSet = {
+ val newBS = new BitSet(capacity)
+ val smaller = math.min(numWords, other.numWords)
+ var ind = 0
+ while (ind < smaller) {
+ newBS.words(ind) = words(ind) & ~other.words(ind)
+ ind += 1
+ }
+ if (ind < numWords) {
+ Array.copy( words, ind, newBS.words, ind, numWords - ind )
+ }
+ newBS
+ }
+
+ /**
* Sets the bit at the specified index to true.
* @param index the bit index
*/
diff --git a/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala
index c32183c134..b85a409a4b 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala
@@ -69,4 +69,87 @@ class BitSetSuite extends FunSuite {
assert(bitset.nextSetBit(96) === 96)
assert(bitset.nextSetBit(97) === -1)
}
+
+ test( "xor len(bitsetX) < len(bitsetY)" ) {
+ val setBitsX = Seq( 0, 2, 3, 37, 41 )
+ val setBitsY = Seq( 0, 1, 3, 37, 38, 41, 85)
+ val bitsetX = new BitSet(60)
+ setBitsX.foreach( i => bitsetX.set(i))
+ val bitsetY = new BitSet(100)
+ setBitsY.foreach( i => bitsetY.set(i))
+
+ val bitsetXor = bitsetX ^ bitsetY
+
+ assert(bitsetXor.nextSetBit(0) === 1)
+ assert(bitsetXor.nextSetBit(1) === 1)
+ assert(bitsetXor.nextSetBit(2) === 2)
+ assert(bitsetXor.nextSetBit(3) === 38)
+ assert(bitsetXor.nextSetBit(38) === 38)
+ assert(bitsetXor.nextSetBit(39) === 85)
+ assert(bitsetXor.nextSetBit(42) === 85)
+ assert(bitsetXor.nextSetBit(85) === 85)
+ assert(bitsetXor.nextSetBit(86) === -1)
+
+ }
+
+ test( "xor len(bitsetX) > len(bitsetY)" ) {
+ val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85)
+ val setBitsY = Seq( 0, 2, 3, 37, 41 )
+ val bitsetX = new BitSet(100)
+ setBitsX.foreach( i => bitsetX.set(i))
+ val bitsetY = new BitSet(60)
+ setBitsY.foreach( i => bitsetY.set(i))
+
+ val bitsetXor = bitsetX ^ bitsetY
+
+ assert(bitsetXor.nextSetBit(0) === 1)
+ assert(bitsetXor.nextSetBit(1) === 1)
+ assert(bitsetXor.nextSetBit(2) === 2)
+ assert(bitsetXor.nextSetBit(3) === 38)
+ assert(bitsetXor.nextSetBit(38) === 38)
+ assert(bitsetXor.nextSetBit(39) === 85)
+ assert(bitsetXor.nextSetBit(42) === 85)
+ assert(bitsetXor.nextSetBit(85) === 85)
+ assert(bitsetXor.nextSetBit(86) === -1)
+
+ }
+
+ test( "andNot len(bitsetX) < len(bitsetY)" ) {
+ val setBitsX = Seq( 0, 2, 3, 37, 41, 48 )
+ val setBitsY = Seq( 0, 1, 3, 37, 38, 41, 85)
+ val bitsetX = new BitSet(60)
+ setBitsX.foreach( i => bitsetX.set(i))
+ val bitsetY = new BitSet(100)
+ setBitsY.foreach( i => bitsetY.set(i))
+
+ val bitsetDiff = bitsetX.andNot( bitsetY )
+
+ assert(bitsetDiff.nextSetBit(0) === 2)
+ assert(bitsetDiff.nextSetBit(1) === 2)
+ assert(bitsetDiff.nextSetBit(2) === 2)
+ assert(bitsetDiff.nextSetBit(3) === 48)
+ assert(bitsetDiff.nextSetBit(48) === 48)
+ assert(bitsetDiff.nextSetBit(49) === -1)
+ assert(bitsetDiff.nextSetBit(65) === -1)
+ }
+
+ test( "andNot len(bitsetX) > len(bitsetY)" ) {
+ val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85)
+ val setBitsY = Seq( 0, 2, 3, 37, 41, 48 )
+ val bitsetX = new BitSet(100)
+ setBitsX.foreach( i => bitsetX.set(i))
+ val bitsetY = new BitSet(60)
+ setBitsY.foreach( i => bitsetY.set(i))
+
+ val bitsetDiff = bitsetX.andNot( bitsetY )
+
+ assert(bitsetDiff.nextSetBit(0) === 1)
+ assert(bitsetDiff.nextSetBit(1) === 1)
+ assert(bitsetDiff.nextSetBit(2) === 38)
+ assert(bitsetDiff.nextSetBit(3) === 38)
+ assert(bitsetDiff.nextSetBit(38) === 38)
+ assert(bitsetDiff.nextSetBit(39) === 85)
+ assert(bitsetDiff.nextSetBit(85) === 85)
+ assert(bitsetDiff.nextSetBit(86) === -1)
+ }
}