aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorDavid Navas <davidn@clearstorydata.com>2016-09-17 16:22:23 +0100
committerSean Owen <sowen@cloudera.com>2016-09-17 16:22:23 +0100
commit9dbd4b864efacd09a8353d00c998be87f9eeacb2 (patch)
treef06c228df6d6582fc3466f0444b572cc1c875dfb /core
parent25cbbe6ca334140204e7035ab8b9d304da9b8a8a (diff)
downloadspark-9dbd4b864efacd09a8353d00c998be87f9eeacb2.tar.gz
spark-9dbd4b864efacd09a8353d00c998be87f9eeacb2.tar.bz2
spark-9dbd4b864efacd09a8353d00c998be87f9eeacb2.zip
[SPARK-17529][CORE] Implement BitSet.clearUntil and use it during merge joins
## What changes were proposed in this pull request? Add a clearUntil() method on BitSet (adapted from the pre-existing setUntil() method). Use this method to clear the subset of the BitSet which needs to be used during merge joins. ## How was this patch tested? dev/run-tests, as well as performance tests on skewed data as described in jira. I expect there to be a small local performance hit using BitSet.clearUntil rather than BitSet.clear for normally shaped (unskewed) joins (additional read on the last long). This is expected to be de-minimis and was not specifically tested. Author: David Navas <davidn@clearstorydata.com> Closes #15084 from davidnavas/bitSet.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/collection/BitSet.scala28
-rw-r--r--core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala32
2 files changed, 50 insertions, 10 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index 7ab67fc3a2..e63e0e3e1f 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -17,6 +17,8 @@
package org.apache.spark.util.collection
+import java.util.Arrays
+
/**
* A simple, fixed-size bit set implementation. This implementation is fast because it avoids
* safety/bound checking.
@@ -35,21 +37,14 @@ class BitSet(numBits: Int) extends Serializable {
/**
* Clear all set bits.
*/
- def clear(): Unit = {
- var i = 0
- while (i < numWords) {
- words(i) = 0L
- i += 1
- }
- }
+ def clear(): Unit = Arrays.fill(words, 0)
/**
* Set all the bits up to a given index
*/
- def setUntil(bitIndex: Int) {
+ def setUntil(bitIndex: Int): Unit = {
val wordIndex = bitIndex >> 6 // divide by 64
- var i = 0
- while(i < wordIndex) { words(i) = -1; i += 1 }
+ Arrays.fill(words, 0, wordIndex, -1)
if(wordIndex < words.length) {
// Set the remaining bits (note that the mask could still be zero)
val mask = ~(-1L << (bitIndex & 0x3f))
@@ -58,6 +53,19 @@ class BitSet(numBits: Int) extends Serializable {
}
/**
+ * Clear all the bits up to a given index
+ */
+ def clearUntil(bitIndex: Int): Unit = {
+ val wordIndex = bitIndex >> 6 // divide by 64
+ Arrays.fill(words, 0, wordIndex, 0)
+ if(wordIndex < words.length) {
+ // Clear the remaining bits
+ val mask = -1L << (bitIndex & 0x3f)
+ words(wordIndex) &= mask
+ }
+ }
+
+ /**
* Compute the bit-wise AND of the two sets returning the
* result.
*/
diff --git a/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala
index 69dbfa9cd7..0169c9926e 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala
@@ -152,4 +152,36 @@ class BitSetSuite extends SparkFunSuite {
assert(bitsetDiff.nextSetBit(85) === 85)
assert(bitsetDiff.nextSetBit(86) === -1)
}
+
+ test( "[gs]etUntil" ) {
+ val bitSet = new BitSet(100)
+
+ bitSet.setUntil(bitSet.capacity)
+
+ (0 until bitSet.capacity).foreach { i =>
+ assert(bitSet.get(i))
+ }
+
+ bitSet.clearUntil(bitSet.capacity)
+
+ (0 until bitSet.capacity).foreach { i =>
+ assert(!bitSet.get(i))
+ }
+
+ val setUntil = bitSet.capacity / 2
+ bitSet.setUntil(setUntil)
+
+ val clearUntil = setUntil / 2
+ bitSet.clearUntil(clearUntil)
+
+ (0 until clearUntil).foreach { i =>
+ assert(!bitSet.get(i))
+ }
+ (clearUntil until setUntil).foreach { i =>
+ assert(bitSet.get(i))
+ }
+ (setUntil until bitSet.capacity).foreach { i =>
+ assert(!bitSet.get(i))
+ }
+ }
}