diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2012-06-09 14:44:18 -0700 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2012-06-09 14:44:18 -0700 |
commit | a96558caa3c0feb20bbf0f3ec367673886fc78c6 (patch) | |
tree | 186307c3756a244a2a1b2df6a8140947eb967ca8 /bagel/src | |
parent | 048276799ae15ce5978733722e8ddde6a07302ff (diff) | |
download | spark-a96558caa3c0feb20bbf0f3ec367673886fc78c6.tar.gz spark-a96558caa3c0feb20bbf0f3ec367673886fc78c6.tar.bz2 spark-a96558caa3c0feb20bbf0f3ec367673886fc78c6.zip |
Performance improvements to shuffle operations: in particular, preserve
RDD partitioning in more cases where it's possible, and use iterators
instead of materializing collections when doing joins.
Diffstat (limited to 'bagel/src')
-rw-r--r-- | bagel/src/main/scala/spark/bagel/Bagel.scala | 3 | ||||
-rw-r--r-- | bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala | 1 |
2 files changed, 1 insertions, 3 deletions
diff --git a/bagel/src/main/scala/spark/bagel/Bagel.scala b/bagel/src/main/scala/spark/bagel/Bagel.scala index 2f57c9c0fd..996ca2a877 100644 --- a/bagel/src/main/scala/spark/bagel/Bagel.scala +++ b/bagel/src/main/scala/spark/bagel/Bagel.scala @@ -30,8 +30,7 @@ object Bagel extends Logging { val aggregated = agg(verts, aggregator) val combinedMsgs = msgs.combineByKey( - combiner.createCombiner, combiner.mergeMsg, combiner.mergeCombiners, - splits, partitioner) + combiner.createCombiner _, combiner.mergeMsg _, combiner.mergeCombiners _, partitioner) val grouped = combinedMsgs.groupWith(verts) val (processed, numMsgs, numActiveVerts) = comp[K, V, M, C](sc, grouped, compute(_, _, aggregated, superstep)) diff --git a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala b/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala index 7084ff97d9..8ce7abd03f 100644 --- a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala +++ b/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRankStandalone.scala @@ -105,7 +105,6 @@ object WikipediaPageRankStandalone { ranks = (contribs.combineByKey((x: Double) => x, (x: Double, y: Double) => x + y, (x: Double, y: Double) => x + y, - numSplits, partitioner) .mapValues(sum => a/n + (1-a)*sum)) } |