aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/context.py6
-rw-r--r--python/pyspark/rdd.py2
-rw-r--r--python/pyspark/tests.py8
3 files changed, 15 insertions, 1 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 44d90f1437..90b2fffbb9 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -324,6 +324,12 @@ class SparkContext(object):
with SparkContext._lock:
SparkContext._active_spark_context = None
+ def emptyRDD(self):
+ """
+ Create an RDD that has no partitions or elements.
+ """
+ return RDD(self._jsc.emptyRDD(), self, NoOpSerializer())
+
def range(self, start, end=None, step=1, numSlices=None):
"""
Create a new RDD of int containing elements from `start` to `end`
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 98a8ff8606..20c0bc93f4 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -960,7 +960,7 @@ class RDD(object):
>>> sc.parallelize([1.0, 2.0, 3.0]).sum()
6.0
"""
- return self.mapPartitions(lambda x: [sum(x)]).reduce(operator.add)
+ return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
def count(self):
"""
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index f9fb37f7fc..11b402e6df 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -458,6 +458,14 @@ class RDDTests(ReusedPySparkTestCase):
self.assertEqual(id + 1, id2)
self.assertEqual(id2, rdd2.id())
+ def test_empty_rdd(self):
+ rdd = self.sc.emptyRDD()
+ self.assertTrue(rdd.isEmpty())
+
+ def test_sum(self):
+ self.assertEqual(0, self.sc.emptyRDD().sum())
+ self.assertEqual(6, self.sc.parallelize([1, 2, 3]).sum())
+
def test_save_as_textfile_with_unicode(self):
# Regression test for SPARK-970
x = u"\u00A1Hola, mundo!"