diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-07-28 23:38:56 -0400 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-07-29 02:51:43 -0400 |
commit | b5ec35562210c8e7ca4fea07a0d46cb255988c0d (patch) | |
tree | 5ff793af32f83fb259508355e536f7caa2fbe7c0 /python/pyspark/rdd.py | |
parent | b9d6783f36d527f5082bf13a4ee6fd108e97795c (diff) | |
download | spark-b5ec35562210c8e7ca4fea07a0d46cb255988c0d.tar.gz spark-b5ec35562210c8e7ca4fea07a0d46cb255988c0d.tar.bz2 spark-b5ec35562210c8e7ca4fea07a0d46cb255988c0d.zip |
Optimize Python foreach() to not return as many objects
Diffstat (limited to 'python/pyspark/rdd.py')
-rw-r--r-- | python/pyspark/rdd.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 6efa61aa66..4aafe35d13 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -267,7 +267,11 @@ class RDD(object): >>> def f(x): print x >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f) """ - self.map(f).collect() # Force evaluation + def processPartition(iterator): + for x in iterator: + f(x) + yield None + self.mapPartitions(processPartition).collect() # Force evaluation def collect(self): """ |