diff options
author | Tathagata Das <tathagata.das1565@gmail.com> | 2013-01-15 12:08:51 -0800 |
---|---|---|
committer | Tathagata Das <tathagata.das1565@gmail.com> | 2013-01-15 12:08:51 -0800 |
commit | cd1521cfdb3c9dd2bf8ced8907afbbbf33893804 (patch) | |
tree | 76fce28a2fca3fcfbbc3a7f4c7b0fe82cfc695c7 /python/examples/wordcount.py | |
parent | 1638fcb0dce296da22ffc90127d5148a8fab745e (diff) | |
parent | cb867e9ffb2c5e3d65d50c222fcce3631b94e4dd (diff) | |
download | spark-cd1521cfdb3c9dd2bf8ced8907afbbbf33893804.tar.gz spark-cd1521cfdb3c9dd2bf8ced8907afbbbf33893804.tar.bz2 spark-cd1521cfdb3c9dd2bf8ced8907afbbbf33893804.zip |
Merge branch 'master' into streaming
Conflicts:
core/src/main/scala/spark/rdd/CoGroupedRDD.scala
core/src/main/scala/spark/rdd/FilteredRDD.scala
docs/_layouts/global.html
docs/index.md
run
Diffstat (limited to 'python/examples/wordcount.py')
-rw-r--r-- | python/examples/wordcount.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/python/examples/wordcount.py b/python/examples/wordcount.py new file mode 100644 index 0000000000..857160624b --- /dev/null +++ b/python/examples/wordcount.py @@ -0,0 +1,19 @@ +import sys +from operator import add + +from pyspark import SparkContext + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print >> sys.stderr, \ + "Usage: PythonWordCount <master> <file>" + exit(-1) + sc = SparkContext(sys.argv[1], "PythonWordCount") + lines = sc.textFile(sys.argv[2], 1) + counts = lines.flatMap(lambda x: x.split(' ')) \ + .map(lambda x: (x, 1)) \ + .reduceByKey(add) + output = counts.collect() + for (word, count) in output: + print "%s : %i" % (word, count) |