diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-08-08 18:50:00 -0700 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-08-08 18:50:00 -0700 |
commit | 06303a62e5dad330687406bdd4e26a2a9aa24ac2 (patch) | |
tree | cac431a8fa4fc0f76e84e03d287ec014637e1b06 /examples/src | |
parent | 01f20a941edc898ed045b96f6ddce15d16b36010 (diff) | |
download | spark-06303a62e5dad330687406bdd4e26a2a9aa24ac2.tar.gz spark-06303a62e5dad330687406bdd4e26a2a9aa24ac2.tar.bz2 spark-06303a62e5dad330687406bdd4e26a2a9aa24ac2.zip |
Optimize JavaPageRank to use reduceByKey instead of groupByKey
Diffstat (limited to 'examples/src')
-rw-r--r-- | examples/src/main/java/spark/examples/JavaPageRank.java | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/examples/src/main/java/spark/examples/JavaPageRank.java b/examples/src/main/java/spark/examples/JavaPageRank.java index 9d90ef9174..75df1af2e3 100644 --- a/examples/src/main/java/spark/examples/JavaPageRank.java +++ b/examples/src/main/java/spark/examples/JavaPageRank.java @@ -23,6 +23,7 @@ import spark.api.java.JavaRDD; import spark.api.java.JavaSparkContext; import spark.api.java.function.FlatMapFunction; import spark.api.java.function.Function; +import spark.api.java.function.Function2; import spark.api.java.function.PairFlatMapFunction; import spark.api.java.function.PairFunction; @@ -39,12 +40,11 @@ import java.util.ArrayList; * where URL and their neighbors are separated by space(s). */ public class JavaPageRank { - private static double sum(List<Double> numbers) { - double out = 0.0; - for (double number : numbers) { - out += number; + private static class Sum extends Function2<Double, Double, Double> { + @Override + public Double call(Double a, Double b) { + return a + b; } - return out; } public static void main(String[] args) throws Exception { @@ -91,16 +91,15 @@ public class JavaPageRank { for (String n : s._1) { results.add(new Tuple2<String, Double>(n, s._2 / s._1.size())); } - return results; } }); // Re-calculates URL ranks based on neighbor contributions. - ranks = contribs.groupByKey().mapValues(new Function<List<Double>, Double>() { + ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override - public Double call(List<Double> cs) throws Exception { - return 0.15 + sum(cs) * 0.85; + public Double call(Double sum) throws Exception { + return 0.15 + sum * 0.85; } }); } |