From 06303a62e5dad330687406bdd4e26a2a9aa24ac2 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 8 Aug 2013 18:50:00 -0700 Subject: Optimize JavaPageRank to use reduceByKey instead of groupByKey --- examples/src/main/java/spark/examples/JavaPageRank.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'examples') diff --git a/examples/src/main/java/spark/examples/JavaPageRank.java b/examples/src/main/java/spark/examples/JavaPageRank.java index 9d90ef9174..75df1af2e3 100644 --- a/examples/src/main/java/spark/examples/JavaPageRank.java +++ b/examples/src/main/java/spark/examples/JavaPageRank.java @@ -23,6 +23,7 @@ import spark.api.java.JavaRDD; import spark.api.java.JavaSparkContext; import spark.api.java.function.FlatMapFunction; import spark.api.java.function.Function; +import spark.api.java.function.Function2; import spark.api.java.function.PairFlatMapFunction; import spark.api.java.function.PairFunction; @@ -39,12 +40,11 @@ import java.util.ArrayList; * where URL and their neighbors are separated by space(s). */ public class JavaPageRank { - private static double sum(List numbers) { - double out = 0.0; - for (double number : numbers) { - out += number; + private static class Sum extends Function2 { + @Override + public Double call(Double a, Double b) { + return a + b; } - return out; } public static void main(String[] args) throws Exception { @@ -91,16 +91,15 @@ public class JavaPageRank { for (String n : s._1) { results.add(new Tuple2(n, s._2 / s._1.size())); } - return results; } }); // Re-calculates URL ranks based on neighbor contributions. - ranks = contribs.groupByKey().mapValues(new Function, Double>() { + ranks = contribs.reduceByKey(new Sum()).mapValues(new Function() { @Override - public Double call(List cs) throws Exception { - return 0.15 + sum(cs) * 0.85; + public Double call(Double sum) throws Exception { + return 0.15 + sum * 0.85; } }); } -- cgit v1.2.3