aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/tests.py')
-rw-r--r--python/pyspark/tests.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index f255b44359..0b3854347a 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -587,6 +587,14 @@ class TestRDDFunctions(PySparkTestCase):
self.assertEquals(partitions[0], [(0, 5), (0, 8), (2, 6)])
self.assertEquals(partitions[1], [(1, 3), (3, 8), (3, 8)])
+ def test_distinct(self):
+ rdd = self.sc.parallelize((1, 2, 3)*10, 10)
+ self.assertEquals(rdd.getNumPartitions(), 10)
+ self.assertEquals(rdd.distinct().count(), 3)
+ result = rdd.distinct(5)
+ self.assertEquals(result.getNumPartitions(), 5)
+ self.assertEquals(result.count(), 3)
+
class TestSQL(PySparkTestCase):
@@ -636,6 +644,15 @@ class TestSQL(PySparkTestCase):
srdd.count()
srdd.collect()
+ def test_distinct(self):
+ rdd = self.sc.parallelize(['{"a": 1}', '{"b": 2}', '{"c": 3}']*10, 10)
+ srdd = self.sqlCtx.jsonRDD(rdd)
+ self.assertEquals(srdd.getNumPartitions(), 10)
+ self.assertEquals(srdd.distinct().count(), 3)
+ result = srdd.distinct(5)
+ self.assertEquals(result.getNumPartitions(), 5)
+ self.assertEquals(result.count(), 3)
+
class TestIO(PySparkTestCase):