aboutsummaryrefslogtreecommitdiff
path: root/core/src
diff options
context:
space:
mode:
authorMark Hamstra <markhamstra@gmail.com>2013-03-14 12:59:58 -0700
committerMark Hamstra <markhamstra@gmail.com>2013-03-14 12:59:58 -0700
commitb1422cbdd59569261c9df84034a5a03833ec3996 (patch)
treeee9ded9861ad827b29173fc37220e76e7262aecd /core/src
parent4032beba4948fc931190e2f16816545c9d0a1930 (diff)
downloadspark-b1422cbdd59569261c9df84034a5a03833ec3996.tar.gz
spark-b1422cbdd59569261c9df84034a5a03833ec3996.tar.bz2
spark-b1422cbdd59569261c9df84034a5a03833ec3996.zip
added foldByKey
Diffstat (limited to 'core/src')
-rw-r--r--core/src/main/scala/spark/PairRDDFunctions.scala7
1 files changed, 7 insertions, 0 deletions
diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala
index e7408e4352..a6e00c3a84 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/spark/PairRDDFunctions.scala
@@ -89,6 +89,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
}
/**
+ * Merge the values for each key using an associative function and a neutral "zero value".
+ */
+ def foldByKey[V1 >: V](zeroValue: V1)(op: (V1, V1) => V1): RDD[(K, V1)] = {
+ groupByKey.mapValues(seq => seq.fold(zeroValue)(op))
+ }
+
+ /**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce.