diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2011-01-05 22:31:50 -0500 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2011-01-05 22:31:50 -0500 |
commit | 15829df799d5c84ff40d5697225d3d1e1f2b5a6f (patch) | |
tree | 2953d54482643367a722a1e3a1a8b0a64165e0b1 | |
parent | 69845d5648dfa890b0d06d5e171d2460aa2e370d (diff) | |
download | spark-15829df799d5c84ff40d5697225d3d1e1f2b5a6f.tar.gz spark-15829df799d5c84ff40d5697225d3d1e1f2b5a6f.tar.bz2 spark-15829df799d5c84ff40d5697225d3d1e1f2b5a6f.zip |
Updated simple skewed group-by to generate multiple keys with each
hashcode so that Mosharaf's current block implementation works
-rw-r--r-- | src/examples/SimpleSkewedGroupByTest.scala | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/src/examples/SimpleSkewedGroupByTest.scala b/src/examples/SimpleSkewedGroupByTest.scala index 857e65ca28..d37efcd566 100644 --- a/src/examples/SimpleSkewedGroupByTest.scala +++ b/src/examples/SimpleSkewedGroupByTest.scala @@ -24,12 +24,13 @@ object SimpleSkewedGroupByTest { for (i <- 0 until numKVPairs) { val byteArr = new Array[Byte](valSize) ranGen.nextBytes(byteArr) + val offset = ranGen.nextInt(1000) * numReducers if (ranGen.nextDouble < ratio / (numReducers + ratio - 1)) { // give ratio times higher chance of generating key 0 (for reducer 0) - result(i) = (0, byteArr) + result(i) = (offset, byteArr) } else { // generate a key for one of the other reducers - val key = 1 + ranGen.nextInt(numReducers-1) + val key = 1 + ranGen.nextInt(numReducers-1) + offset result(i) = (key, byteArr) } } |