aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2011-01-05 22:31:50 -0500
committerMatei Zaharia <matei@eecs.berkeley.edu>2011-01-05 22:31:50 -0500
commit15829df799d5c84ff40d5697225d3d1e1f2b5a6f (patch)
tree2953d54482643367a722a1e3a1a8b0a64165e0b1
parent69845d5648dfa890b0d06d5e171d2460aa2e370d (diff)
downloadspark-15829df799d5c84ff40d5697225d3d1e1f2b5a6f.tar.gz
spark-15829df799d5c84ff40d5697225d3d1e1f2b5a6f.tar.bz2
spark-15829df799d5c84ff40d5697225d3d1e1f2b5a6f.zip
Updated simple skewed group-by to generate multiple keys with each
hashcode so that Mosharaf's current block implementation works
-rw-r--r--src/examples/SimpleSkewedGroupByTest.scala5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/examples/SimpleSkewedGroupByTest.scala b/src/examples/SimpleSkewedGroupByTest.scala
index 857e65ca28..d37efcd566 100644
--- a/src/examples/SimpleSkewedGroupByTest.scala
+++ b/src/examples/SimpleSkewedGroupByTest.scala
@@ -24,12 +24,13 @@ object SimpleSkewedGroupByTest {
for (i <- 0 until numKVPairs) {
val byteArr = new Array[Byte](valSize)
ranGen.nextBytes(byteArr)
+ val offset = ranGen.nextInt(1000) * numReducers
if (ranGen.nextDouble < ratio / (numReducers + ratio - 1)) {
// give ratio times higher chance of generating key 0 (for reducer 0)
- result(i) = (0, byteArr)
+ result(i) = (offset, byteArr)
} else {
// generate a key for one of the other reducers
- val key = 1 + ranGen.nextInt(numReducers-1)
+ val key = 1 + ranGen.nextInt(numReducers-1) + offset
result(i) = (key, byteArr)
}
}