aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorReynold Xin <rxin@apache.org>2014-01-25 22:41:30 -0800
committerReynold Xin <rxin@apache.org>2014-01-25 22:41:30 -0800
commitc40619d4873f36ffb96a2e6292b32d5b64eab153 (patch)
tree08cf105683c13bac4a4c5a09290ffe026880ac21 /python
parentc66a2ef1c2dc9c218069b3ce8c39a49e5b92fc16 (diff)
parentf83068497ba42c5ea5c636efebca81f684e96177 (diff)
downloadspark-c40619d4873f36ffb96a2e6292b32d5b64eab153.tar.gz
spark-c40619d4873f36ffb96a2e6292b32d5b64eab153.tar.bz2
spark-c40619d4873f36ffb96a2e6292b32d5b64eab153.zip
Merge pull request #504 from JoshRosen/SPARK-1025
Fix PySpark hang when input files are deleted (SPARK-1025) This pull request addresses [SPARK-1025](https://spark-project.atlassian.net/browse/SPARK-1025), an issue where PySpark could hang if its input files were deleted.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/tests.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index acd1ca5676..527104587f 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -168,6 +168,17 @@ class TestRDDFunctions(PySparkTestCase):
self.assertEqual("Hello World!", x.strip())
self.assertEqual("Hello World!", y.strip())
+ def test_deleting_input_files(self):
+ # Regression test for SPARK-1025
+ tempFile = NamedTemporaryFile(delete=False)
+ tempFile.write("Hello World!")
+ tempFile.close()
+ data = self.sc.textFile(tempFile.name)
+ filtered_data = data.filter(lambda x: True)
+ self.assertEqual(1, filtered_data.count())
+ os.unlink(tempFile.name)
+ self.assertRaises(Exception, lambda: filtered_data.count())
+
class TestIO(PySparkTestCase):