diff options
Diffstat (limited to 'python/pyspark/tests.py')
-rw-r--r-- | python/pyspark/tests.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 32645778c2..bca52a7ce6 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -533,6 +533,15 @@ class RDDTests(ReusedPySparkTestCase): a = a._reserialize(BatchedSerializer(PickleSerializer(), 2)) b = b._reserialize(MarshalSerializer()) self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)]) + # regression test for SPARK-4841 + path = os.path.join(SPARK_HOME, "python/test_support/hello.txt") + t = self.sc.textFile(path) + cnt = t.count() + self.assertEqual(cnt, t.zip(t).count()) + rdd = t.map(str) + self.assertEqual(cnt, t.zip(rdd).count()) + # regression test for bug in _reserializer() + self.assertEqual(cnt, t.zip(rdd).count()) def test_zip_with_different_number_of_items(self): a = self.sc.parallelize(range(5), 2) |