aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/pagerank.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/pagerank.py')
-rwxr-xr-xexamples/src/main/python/pagerank.py16
1 files changed, 9 insertions, 7 deletions
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index a5f25d78c1..2fdc9773d4 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -19,6 +19,7 @@
This is an example implementation of PageRank. For more conventional use,
Please refer to PageRank implementation provided by graphx
"""
+from __future__ import print_function
import re
import sys
@@ -42,11 +43,12 @@ def parseNeighbors(urls):
if __name__ == "__main__":
if len(sys.argv) != 3:
- print >> sys.stderr, "Usage: pagerank <file> <iterations>"
+ print("Usage: pagerank <file> <iterations>", file=sys.stderr)
exit(-1)
- print >> sys.stderr, """WARN: This is a naive implementation of PageRank and is
- given as an example! Please refer to PageRank implementation provided by graphx"""
+ print("""WARN: This is a naive implementation of PageRank and is
+ given as an example! Please refer to PageRank implementation provided by graphx""",
+ file=sys.stderr)
# Initialize the spark context.
sc = SparkContext(appName="PythonPageRank")
@@ -62,19 +64,19 @@ if __name__ == "__main__":
links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache()
# Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
- ranks = links.map(lambda (url, neighbors): (url, 1.0))
+ ranks = links.map(lambda url_neighbors: (url_neighbors[0], 1.0))
# Calculates and updates URL ranks continuously using PageRank algorithm.
- for iteration in xrange(int(sys.argv[2])):
+ for iteration in range(int(sys.argv[2])):
# Calculates URL contributions to the rank of other URLs.
contribs = links.join(ranks).flatMap(
- lambda (url, (urls, rank)): computeContribs(urls, rank))
+ lambda url_urls_rank: computeContribs(url_urls_rank[1][0], url_urls_rank[1][1]))
# Re-calculates URL ranks based on neighbor contributions.
ranks = contribs.reduceByKey(add).mapValues(lambda rank: rank * 0.85 + 0.15)
# Collects all URL ranks and dump them to console.
for (link, rank) in ranks.collect():
- print "%s has rank: %s." % (link, rank)
+ print("%s has rank: %s." % (link, rank))
sc.stop()