diff options
author | Josh Rosen <rosenville@gmail.com> | 2012-08-10 01:10:02 -0700 |
---|---|---|
committer | Josh Rosen <rosenville@gmail.com> | 2012-08-18 22:33:51 -0700 |
commit | 886b39de557b4d5f54f5ca11559fca9799534280 (patch) | |
tree | ff4504773f3f75b2408f5acbc1a9e0e0b3b3ff64 /python | |
parent | 9a0c128feceb63685513ce9c1022ef2d4de43fbf (diff) | |
download | spark-886b39de557b4d5f54f5ca11559fca9799534280.tar.gz spark-886b39de557b4d5f54f5ca11559fca9799534280.tar.bz2 spark-886b39de557b4d5f54f5ca11559fca9799534280.zip |
Add Python API.
Diffstat (limited to 'python')
-rw-r--r-- | python/tc.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/python/tc.py b/python/tc.py new file mode 100644 index 0000000000..5dcc4317e0 --- /dev/null +++ b/python/tc.py @@ -0,0 +1,22 @@ +from rdd import SparkContext + +sc = SparkContext("local", "PythonWordCount") +e = [(1, 2), (2, 3), (4, 1)] + +tc = sc.parallelizePairs(e) + +edges = tc.mapPairs(lambda (x, y): (y, x)) + +oldCount = 0 +nextCount = tc.count() + +def project(x): + return (x[1][1], x[1][0]) + +while nextCount != oldCount: + oldCount = nextCount + tc = tc.union(tc.join(edges).mapPairs(project)).distinct() + nextCount = tc.count() + +print "TC has %i edges" % tc.count() +print tc.collect() |