aboutsummaryrefslogtreecommitdiff
path: root/pyspark
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2012-08-27 00:13:19 -0700
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2012-08-27 00:24:47 -0700
commit414367850982c4f8fc5e63cc94caa422eb736db5 (patch)
treee11baba13ea14dc6cb5e70ba05fd3056489b7198 /pyspark
parentbff6a46359131a8f9bc38b93149b22baa7c711cd (diff)
downloadspark-414367850982c4f8fc5e63cc94caa422eb736db5.tar.gz
spark-414367850982c4f8fc5e63cc94caa422eb736db5.tar.bz2
spark-414367850982c4f8fc5e63cc94caa422eb736db5.zip
Fix minor bugs in Python API examples.
Diffstat (limited to 'pyspark')
-rw-r--r--pyspark/pyspark/examples/pi.py2
-rw-r--r--pyspark/pyspark/examples/tc.py8
2 files changed, 5 insertions, 5 deletions
diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/pyspark/examples/pi.py
index fe63d2c952..348bbc5dce 100644
--- a/pyspark/pyspark/examples/pi.py
+++ b/pyspark/pyspark/examples/pi.py
@@ -9,7 +9,7 @@ if __name__ == "__main__":
print >> sys.stderr, \
"Usage: PythonPi <host> [<slices>]"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonKMeans")
+ sc = SparkContext(sys.argv[1], "PythonPi")
slices = int(sys.argv[2]) if len(sys.argv) > 2 else 2
n = 100000 * slices
def f(_):
diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/pyspark/examples/tc.py
index 2796fdc6ad..9630e72b47 100644
--- a/pyspark/pyspark/examples/tc.py
+++ b/pyspark/pyspark/examples/tc.py
@@ -22,9 +22,9 @@ if __name__ == "__main__":
print >> sys.stderr, \
"Usage: PythonTC <host> [<slices>]"
exit(-1)
- sc = SparkContext(sys.argv[1], "PythonKMeans")
+ sc = SparkContext(sys.argv[1], "PythonTC")
slices = sys.argv[2] if len(sys.argv) > 2 else 2
- tc = sc.parallelizePairs(generateGraph(), slices).cache()
+ tc = sc.parallelize(generateGraph(), slices).cache()
# Linear transitive closure: each round grows paths by one edge,
# by joining the graph's edges with the already-discovered paths.
@@ -32,7 +32,7 @@ if __name__ == "__main__":
# the graph to obtain the path (x, z).
# Because join() joins on keys, the edges are stored in reversed order.
- edges = tc.mapPairs(lambda (x, y): (y, x))
+ edges = tc.map(lambda (x, y): (y, x))
oldCount = 0L
nextCount = tc.count()
@@ -40,7 +40,7 @@ if __name__ == "__main__":
oldCount = nextCount
# Perform the join, obtaining an RDD of (y, (z, x)) pairs,
# then project the result to obtain the new (x, z) paths.
- new_edges = tc.join(edges).mapPairs(lambda (_, (a, b)): (b, a))
+ new_edges = tc.join(edges).map(lambda (_, (a, b)): (b, a))
tc = tc.union(new_edges).distinct().cache()
nextCount = tc.count()
if nextCount == oldCount: