aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-08-08 18:50:00 -0700
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-08-08 18:50:00 -0700
commit06303a62e5dad330687406bdd4e26a2a9aa24ac2 (patch)
treecac431a8fa4fc0f76e84e03d287ec014637e1b06 /examples
parent01f20a941edc898ed045b96f6ddce15d16b36010 (diff)
downloadspark-06303a62e5dad330687406bdd4e26a2a9aa24ac2.tar.gz
spark-06303a62e5dad330687406bdd4e26a2a9aa24ac2.tar.bz2
spark-06303a62e5dad330687406bdd4e26a2a9aa24ac2.zip
Optimize JavaPageRank to use reduceByKey instead of groupByKey
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/spark/examples/JavaPageRank.java17
1 files changed, 8 insertions, 9 deletions
diff --git a/examples/src/main/java/spark/examples/JavaPageRank.java b/examples/src/main/java/spark/examples/JavaPageRank.java
index 9d90ef9174..75df1af2e3 100644
--- a/examples/src/main/java/spark/examples/JavaPageRank.java
+++ b/examples/src/main/java/spark/examples/JavaPageRank.java
@@ -23,6 +23,7 @@ import spark.api.java.JavaRDD;
import spark.api.java.JavaSparkContext;
import spark.api.java.function.FlatMapFunction;
import spark.api.java.function.Function;
+import spark.api.java.function.Function2;
import spark.api.java.function.PairFlatMapFunction;
import spark.api.java.function.PairFunction;
@@ -39,12 +40,11 @@ import java.util.ArrayList;
* where URL and their neighbors are separated by space(s).
*/
public class JavaPageRank {
- private static double sum(List<Double> numbers) {
- double out = 0.0;
- for (double number : numbers) {
- out += number;
+ private static class Sum extends Function2<Double, Double, Double> {
+ @Override
+ public Double call(Double a, Double b) {
+ return a + b;
}
- return out;
}
public static void main(String[] args) throws Exception {
@@ -91,16 +91,15 @@ public class JavaPageRank {
for (String n : s._1) {
results.add(new Tuple2<String, Double>(n, s._2 / s._1.size()));
}
-
return results;
}
});
// Re-calculates URL ranks based on neighbor contributions.
- ranks = contribs.groupByKey().mapValues(new Function<List<Double>, Double>() {
+ ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
@Override
- public Double call(List<Double> cs) throws Exception {
- return 0.15 + sum(cs) * 0.85;
+ public Double call(Double sum) throws Exception {
+ return 0.15 + sum * 0.85;
}
});
}