aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-07-28 23:50:38 -0400
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-07-29 02:51:43 -0400
commitd75c3086951f603ec30b2527c24559e053ed7f25 (patch)
tree9ed6719befd62705f34c67557e51ce2a2d9bf1e8 /python/pyspark
parent96b50e82dc0db501ab94748a23ae3237f1d82034 (diff)
downloadspark-d75c3086951f603ec30b2527c24559e053ed7f25.tar.gz
spark-d75c3086951f603ec30b2527c24559e053ed7f25.tar.bz2
spark-d75c3086951f603ec30b2527c24559e053ed7f25.zip
Use None instead of empty string as it's slightly smaller/faster
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/rdd.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 4aafe35d13..8734cacb0b 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -160,7 +160,7 @@ class RDD(object):
>>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
[1, 2, 3]
"""
- return self.map(lambda x: (x, "")) \
+ return self.map(lambda x: (x, None)) \
.reduceByKey(lambda x, _: x) \
.map(lambda (x, _): x)