diff options
author | Mark Hamstra <markhamstra@gmail.com> | 2013-03-14 12:59:58 -0700 |
---|---|---|
committer | Mark Hamstra <markhamstra@gmail.com> | 2013-03-14 12:59:58 -0700 |
commit | b1422cbdd59569261c9df84034a5a03833ec3996 (patch) | |
tree | ee9ded9861ad827b29173fc37220e76e7262aecd /core/src | |
parent | 4032beba4948fc931190e2f16816545c9d0a1930 (diff) | |
download | spark-b1422cbdd59569261c9df84034a5a03833ec3996.tar.gz spark-b1422cbdd59569261c9df84034a5a03833ec3996.tar.bz2 spark-b1422cbdd59569261c9df84034a5a03833ec3996.zip |
added foldByKey
Diffstat (limited to 'core/src')
-rw-r--r-- | core/src/main/scala/spark/PairRDDFunctions.scala | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index e7408e4352..a6e00c3a84 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -89,6 +89,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( } /** + * Merge the values for each key using an associative function and a neutral "zero value". + */ + def foldByKey[V1 >: V](zeroValue: V1)(op: (V1, V1) => V1): RDD[(K, V1)] = { + groupByKey.mapValues(seq => seq.fold(zeroValue)(op)) + } + + /** * Merge the values for each key using an associative reduce function. This will also perform * the merging locally on each mapper before sending results to a reducer, similarly to a * "combiner" in MapReduce. |