aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/pagerank.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/pagerank.py')
-rwxr-xr-xexamples/src/main/python/pagerank.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index cd774cf3a3..d350fa46fa 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -36,19 +36,19 @@ def parseNeighbors(urls):
if __name__ == "__main__":
- if len(sys.argv) < 3:
- print >> sys.stderr, "Usage: pagerank <master> <file> <number_of_iterations>"
+ if len(sys.argv) != 3:
+ print >> sys.stderr, "Usage: pagerank <file> <iterations>"
exit(-1)
# Initialize the spark context.
- sc = SparkContext(sys.argv[1], "PythonPageRank")
+ sc = SparkContext(appName="PythonPageRank")
# Loads in input file. It should be in format of:
# URL neighbor URL
# URL neighbor URL
# URL neighbor URL
# ...
- lines = sc.textFile(sys.argv[2], 1)
+ lines = sc.textFile(sys.argv[1], 1)
# Loads all URLs from input file and initialize their neighbors.
links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache()
@@ -57,7 +57,7 @@ if __name__ == "__main__":
ranks = links.map(lambda (url, neighbors): (url, 1.0))
# Calculates and updates URL ranks continuously using PageRank algorithm.
- for iteration in xrange(int(sys.argv[3])):
+ for iteration in xrange(int(sys.argv[2])):
# Calculates URL contributions to the rank of other URLs.
contribs = links.join(ranks).flatMap(lambda (url, (urls, rank)):
computeContribs(urls, rank))