diff options
Diffstat (limited to 'examples/src/main/python/pagerank.py')
-rwxr-xr-x | examples/src/main/python/pagerank.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py index cd774cf3a3..d350fa46fa 100755 --- a/examples/src/main/python/pagerank.py +++ b/examples/src/main/python/pagerank.py @@ -36,19 +36,19 @@ def parseNeighbors(urls): if __name__ == "__main__": - if len(sys.argv) < 3: - print >> sys.stderr, "Usage: pagerank <master> <file> <number_of_iterations>" + if len(sys.argv) != 3: + print >> sys.stderr, "Usage: pagerank <file> <iterations>" exit(-1) # Initialize the spark context. - sc = SparkContext(sys.argv[1], "PythonPageRank") + sc = SparkContext(appName="PythonPageRank") # Loads in input file. It should be in format of: # URL neighbor URL # URL neighbor URL # URL neighbor URL # ... - lines = sc.textFile(sys.argv[2], 1) + lines = sc.textFile(sys.argv[1], 1) # Loads all URLs from input file and initialize their neighbors. links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache() @@ -57,7 +57,7 @@ if __name__ == "__main__": ranks = links.map(lambda (url, neighbors): (url, 1.0)) # Calculates and updates URL ranks continuously using PageRank algorithm. - for iteration in xrange(int(sys.argv[3])): + for iteration in xrange(int(sys.argv[2])): # Calculates URL contributions to the rank of other URLs. contribs = links.join(ranks).flatMap(lambda (url, (urls, rank)): computeContribs(urls, rank)) |