aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorAndrew Or <andrewor14@gmail.com>2014-05-16 22:36:23 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-05-16 22:36:23 -0700
commitcf6cbe9f76c3b322a968c836d039fc5b70d4ce43 (patch)
tree7f1269166db1364d6f9393bd65d830a9948ce884 /examples
parent4b8ec6fcfd7a7ef0857d5b21917183c181301c95 (diff)
downloadspark-cf6cbe9f76c3b322a968c836d039fc5b70d4ce43.tar.gz
spark-cf6cbe9f76c3b322a968c836d039fc5b70d4ce43.tar.bz2
spark-cf6cbe9f76c3b322a968c836d039fc5b70d4ce43.zip
[SPARK-1824] Remove <master> from Python examples
A recent PR (#552) fixed this for all Scala / Java examples. We need to do it for python too. Note that this blocks on #799, which makes `bin/pyspark` go through Spark submit. With only the changes in this PR, the only way to run these examples is through Spark submit. Once #799 goes in, you can use `bin/pyspark` to run them too. For example, ``` bin/pyspark examples/src/main/python/pi.py 100 --master local-cluster[4,1,512] ``` Author: Andrew Or <andrewor14@gmail.com> Closes #802 from andrewor14/python-examples and squashes the following commits: cf50b9f [Andrew Or] De-indent python comments (minor) 50f80b1 [Andrew Or] Remove pyFiles from SparkContext construction c362f69 [Andrew Or] Update docs to use spark-submit for python applications 7072c6a [Andrew Or] Merge branch 'master' of github.com:apache/spark into python-examples 427a5f0 [Andrew Or] Update docs d32072c [Andrew Or] Remove <master> from examples + update usages
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/src/main/python/als.py18
-rwxr-xr-xexamples/src/main/python/kmeans.py12
-rwxr-xr-xexamples/src/main/python/logistic_regression.py10
-rwxr-xr-xexamples/src/main/python/mllib/kmeans.py10
-rwxr-xr-xexamples/src/main/python/mllib/logistic_regression.py10
-rwxr-xr-xexamples/src/main/python/pagerank.py10
-rwxr-xr-xexamples/src/main/python/pi.py10
-rwxr-xr-xexamples/src/main/python/sort.py8
-rwxr-xr-xexamples/src/main/python/transitive_closure.py10
-rwxr-xr-xexamples/src/main/python/wordcount.py8
10 files changed, 53 insertions, 53 deletions
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py
index 01552dc1d4..f0b46cd28b 100755
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -46,15 +46,15 @@ def update(i, vec, mat, ratings):
return np.linalg.solve(XtX, Xty)
if __name__ == "__main__":
- if len(sys.argv) < 2:
- print >> sys.stderr, "Usage: als <master> <M> <U> <F> <iters> <slices>"
- exit(-1)
- sc = SparkContext(sys.argv[1], "PythonALS", pyFiles=[realpath(__file__)])
- M = int(sys.argv[2]) if len(sys.argv) > 2 else 100
- U = int(sys.argv[3]) if len(sys.argv) > 3 else 500
- F = int(sys.argv[4]) if len(sys.argv) > 4 else 10
- ITERATIONS = int(sys.argv[5]) if len(sys.argv) > 5 else 5
- slices = int(sys.argv[6]) if len(sys.argv) > 6 else 2
+ """
+ Usage: als [M] [U] [F] [iterations] [slices]"
+ """
+ sc = SparkContext(appName="PythonALS")
+ M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
+ U = int(sys.argv[2]) if len(sys.argv) > 2 else 500
+ F = int(sys.argv[3]) if len(sys.argv) > 3 else 10
+ ITERATIONS = int(sys.argv[4]) if len(sys.argv) > 4 else 5
+ slices = int(sys.argv[5]) if len(sys.argv) > 5 else 2
print "Running ALS with M=%d, U=%d, F=%d, iters=%d, slices=%d\n" % \
(M, U, F, ITERATIONS, slices)
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py
index e3596488fa..fc16586c28 100755
--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -45,14 +45,14 @@ def closestPoint(p, centers):
if __name__ == "__main__":
- if len(sys.argv) < 5:
- print >> sys.stderr, "Usage: kmeans <master> <file> <k> <convergeDist>"
+ if len(sys.argv) != 4:
+ print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonKMeans")
- lines = sc.textFile(sys.argv[2])
+ sc = SparkContext(appName="PythonKMeans")
+ lines = sc.textFile(sys.argv[1])
data = lines.map(parseVector).cache()
- K = int(sys.argv[3])
- convergeDist = float(sys.argv[4])
+ K = int(sys.argv[2])
+ convergeDist = float(sys.argv[3])
kPoints = data.takeSample(False, K, 1)
tempDist = 1.0
diff --git a/examples/src/main/python/logistic_regression.py b/examples/src/main/python/logistic_regression.py
index fe5373cf79..0f22d0b323 100755
--- a/examples/src/main/python/logistic_regression.py
+++ b/examples/src/main/python/logistic_regression.py
@@ -47,12 +47,12 @@ def readPointBatch(iterator):
return [matrix]
if __name__ == "__main__":
- if len(sys.argv) != 4:
- print >> sys.stderr, "Usage: logistic_regression <master> <file> <iters>"
+ if len(sys.argv) != 3:
+ print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonLR", pyFiles=[realpath(__file__)])
- points = sc.textFile(sys.argv[2]).mapPartitions(readPointBatch).cache()
- iterations = int(sys.argv[3])
+ sc = SparkContext(appName="PythonLR")
+ points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache()
+ iterations = int(sys.argv[2])
# Initialize w to a random value
w = 2 * np.random.ranf(size=D) - 1
diff --git a/examples/src/main/python/mllib/kmeans.py b/examples/src/main/python/mllib/kmeans.py
index dec82ff34f..b308132c9a 100755
--- a/examples/src/main/python/mllib/kmeans.py
+++ b/examples/src/main/python/mllib/kmeans.py
@@ -33,12 +33,12 @@ def parseVector(line):
if __name__ == "__main__":
- if len(sys.argv) < 4:
- print >> sys.stderr, "Usage: kmeans <master> <file> <k>"
+ if len(sys.argv) != 3:
+ print >> sys.stderr, "Usage: kmeans <file> <k>"
exit(-1)
- sc = SparkContext(sys.argv[1], "KMeans")
- lines = sc.textFile(sys.argv[2])
+ sc = SparkContext(appName="KMeans")
+ lines = sc.textFile(sys.argv[1])
data = lines.map(parseVector)
- k = int(sys.argv[3])
+ k = int(sys.argv[2])
model = KMeans.train(data, k)
print "Final centers: " + str(model.clusterCenters)
diff --git a/examples/src/main/python/mllib/logistic_regression.py b/examples/src/main/python/mllib/logistic_regression.py
index 8631051d00..6e0f7a4ee5 100755
--- a/examples/src/main/python/mllib/logistic_regression.py
+++ b/examples/src/main/python/mllib/logistic_regression.py
@@ -39,12 +39,12 @@ def parsePoint(line):
if __name__ == "__main__":
- if len(sys.argv) != 4:
- print >> sys.stderr, "Usage: logistic_regression <master> <file> <iters>"
+ if len(sys.argv) != 3:
+ print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonLR")
- points = sc.textFile(sys.argv[2]).map(parsePoint)
- iterations = int(sys.argv[3])
+ sc = SparkContext(appName="PythonLR")
+ points = sc.textFile(sys.argv[1]).map(parsePoint)
+ iterations = int(sys.argv[2])
model = LogisticRegressionWithSGD.train(points, iterations)
print "Final weights: " + str(model.weights)
print "Final intercept: " + str(model.intercept)
diff --git a/examples/src/main/python/pagerank.py b/examples/src/main/python/pagerank.py
index cd774cf3a3..d350fa46fa 100755
--- a/examples/src/main/python/pagerank.py
+++ b/examples/src/main/python/pagerank.py
@@ -36,19 +36,19 @@ def parseNeighbors(urls):
if __name__ == "__main__":
- if len(sys.argv) < 3:
- print >> sys.stderr, "Usage: pagerank <master> <file> <number_of_iterations>"
+ if len(sys.argv) != 3:
+ print >> sys.stderr, "Usage: pagerank <file> <iterations>"
exit(-1)
# Initialize the spark context.
- sc = SparkContext(sys.argv[1], "PythonPageRank")
+ sc = SparkContext(appName="PythonPageRank")
# Loads in input file. It should be in format of:
# URL neighbor URL
# URL neighbor URL
# URL neighbor URL
# ...
- lines = sc.textFile(sys.argv[2], 1)
+ lines = sc.textFile(sys.argv[1], 1)
# Loads all URLs from input file and initialize their neighbors.
links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache()
@@ -57,7 +57,7 @@ if __name__ == "__main__":
ranks = links.map(lambda (url, neighbors): (url, 1.0))
# Calculates and updates URL ranks continuously using PageRank algorithm.
- for iteration in xrange(int(sys.argv[3])):
+ for iteration in xrange(int(sys.argv[2])):
# Calculates URL contributions to the rank of other URLs.
contribs = links.join(ranks).flatMap(lambda (url, (urls, rank)):
computeContribs(urls, rank))
diff --git a/examples/src/main/python/pi.py b/examples/src/main/python/pi.py
index ab0645fc2f..234720b55f 100755
--- a/examples/src/main/python/pi.py
+++ b/examples/src/main/python/pi.py
@@ -23,11 +23,11 @@ from pyspark import SparkContext
if __name__ == "__main__":
- if len(sys.argv) == 1:
- print >> sys.stderr, "Usage: pi <master> [<slices>]"
- exit(-1)
- sc = SparkContext(sys.argv[1], "PythonPi")
- slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
+ """
+ Usage: pi [slices]
+ """
+ sc = SparkContext(appName="PythonPi")
+ slices = int(sys.argv[1]) if len(sys.argv) > 1 else 2
n = 100000 * slices
def f(_):
x = random() * 2 - 1
diff --git a/examples/src/main/python/sort.py b/examples/src/main/python/sort.py
index 5de20a6d98..4913ee926a 100755
--- a/examples/src/main/python/sort.py
+++ b/examples/src/main/python/sort.py
@@ -21,11 +21,11 @@ from pyspark import SparkContext
if __name__ == "__main__":
- if len(sys.argv) < 3:
- print >> sys.stderr, "Usage: sort <master> <file>"
+ if len(sys.argv) != 2:
+ print >> sys.stderr, "Usage: sort <file>"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonSort")
- lines = sc.textFile(sys.argv[2], 1)
+ sc = SparkContext(appName="PythonSort")
+ lines = sc.textFile(sys.argv[1], 1)
sortedCount = lines.flatMap(lambda x: x.split(' ')) \
.map(lambda x: (int(x), 1)) \
.sortByKey(lambda x: x)
diff --git a/examples/src/main/python/transitive_closure.py b/examples/src/main/python/transitive_closure.py
index 744cce6651..8698369b13 100755
--- a/examples/src/main/python/transitive_closure.py
+++ b/examples/src/main/python/transitive_closure.py
@@ -36,11 +36,11 @@ def generateGraph():
if __name__ == "__main__":
- if len(sys.argv) == 1:
- print >> sys.stderr, "Usage: transitive_closure <master> [<slices>]"
- exit(-1)
- sc = SparkContext(sys.argv[1], "PythonTransitiveClosure")
- slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
+ """
+ Usage: transitive_closure [slices]
+ """
+ sc = SparkContext(appName="PythonTransitiveClosure")
+ slices = int(sys.argv[1]) if len(sys.argv) > 1 else 2
tc = sc.parallelize(generateGraph(), slices).cache()
# Linear transitive closure: each round grows paths by one edge,
diff --git a/examples/src/main/python/wordcount.py b/examples/src/main/python/wordcount.py
index b9139b9d76..dcc095fdd0 100755
--- a/examples/src/main/python/wordcount.py
+++ b/examples/src/main/python/wordcount.py
@@ -22,11 +22,11 @@ from pyspark import SparkContext
if __name__ == "__main__":
- if len(sys.argv) < 3:
- print >> sys.stderr, "Usage: wordcount <master> <file>"
+ if len(sys.argv) != 2:
+ print >> sys.stderr, "Usage: wordcount <file>"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonWordCount")
- lines = sc.textFile(sys.argv[2], 1)
+ sc = SparkContext(appName="PythonWordCount")
+ lines = sc.textFile(sys.argv[1], 1)
counts = lines.flatMap(lambda x: x.split(' ')) \
.map(lambda x: (x, 1)) \
.reduceByKey(add)