diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/context.py | 6 | ||||
-rw-r--r-- | python/pyspark/rdd.py | 2 | ||||
-rw-r--r-- | python/pyspark/tests.py | 8 |
3 files changed, 15 insertions, 1 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 44d90f1437..90b2fffbb9 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -324,6 +324,12 @@ class SparkContext(object): with SparkContext._lock: SparkContext._active_spark_context = None + def emptyRDD(self): + """ + Create an RDD that has no partitions or elements. + """ + return RDD(self._jsc.emptyRDD(), self, NoOpSerializer()) + def range(self, start, end=None, step=1, numSlices=None): """ Create a new RDD of int containing elements from `start` to `end` diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 98a8ff8606..20c0bc93f4 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -960,7 +960,7 @@ class RDD(object): >>> sc.parallelize([1.0, 2.0, 3.0]).sum() 6.0 """ - return self.mapPartitions(lambda x: [sum(x)]).reduce(operator.add) + return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add) def count(self): """ diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index f9fb37f7fc..11b402e6df 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -458,6 +458,14 @@ class RDDTests(ReusedPySparkTestCase): self.assertEqual(id + 1, id2) self.assertEqual(id2, rdd2.id()) + def test_empty_rdd(self): + rdd = self.sc.emptyRDD() + self.assertTrue(rdd.isEmpty()) + + def test_sum(self): + self.assertEqual(0, self.sc.emptyRDD().sum()) + self.assertEqual(6, self.sc.parallelize([1, 2, 3]).sum()) + def test_save_as_textfile_with_unicode(self): # Regression test for SPARK-970 x = u"\u00A1Hola, mundo!" |