aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorJosh Rosen <rosenville@gmail.com>2012-08-10 01:10:02 -0700
committerJosh Rosen <rosenville@gmail.com>2012-08-18 22:33:51 -0700
commit886b39de557b4d5f54f5ca11559fca9799534280 (patch)
treeff4504773f3f75b2408f5acbc1a9e0e0b3b3ff64 /python
parent9a0c128feceb63685513ce9c1022ef2d4de43fbf (diff)
downloadspark-886b39de557b4d5f54f5ca11559fca9799534280.tar.gz
spark-886b39de557b4d5f54f5ca11559fca9799534280.tar.bz2
spark-886b39de557b4d5f54f5ca11559fca9799534280.zip
Add Python API.
Diffstat (limited to 'python')
-rw-r--r--python/tc.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/python/tc.py b/python/tc.py
new file mode 100644
index 0000000000..5dcc4317e0
--- /dev/null
+++ b/python/tc.py
@@ -0,0 +1,22 @@
+from rdd import SparkContext
+
+sc = SparkContext("local", "PythonWordCount")
+e = [(1, 2), (2, 3), (4, 1)]
+
+tc = sc.parallelizePairs(e)
+
+edges = tc.mapPairs(lambda (x, y): (y, x))
+
+oldCount = 0
+nextCount = tc.count()
+
+def project(x):
+ return (x[1][1], x[1][0])
+
+while nextCount != oldCount:
+ oldCount = nextCount
+ tc = tc.union(tc.join(edges).mapPairs(project)).distinct()
+ nextCount = tc.count()
+
+print "TC has %i edges" % tc.count()
+print tc.collect()