aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/tests.py
diff options
context:
space:
mode:
authorDavies Liu <davies.liu@gmail.com>2014-10-23 17:20:00 -0700
committerJosh Rosen <joshrosen@databricks.com>2014-10-23 17:20:00 -0700
commite595c8d08a20a122295af62d5e9cc4116f9727f6 (patch)
treeec0226aecad30372b9ece27e534f4482c24c94bf /python/pyspark/tests.py
parent83b7a1c6503adce1826fc537b4db47e534da5cae (diff)
downloadspark-e595c8d08a20a122295af62d5e9cc4116f9727f6.tar.gz
spark-e595c8d08a20a122295af62d5e9cc4116f9727f6.tar.bz2
spark-e595c8d08a20a122295af62d5e9cc4116f9727f6.zip
[SPARK-3993] [PySpark] fix bug while reuse worker after take()
After take(), maybe there are some garbage left in the socket, then next task assigned to this worker will hang because of corrupted data. We should make sure the socket is clean before reuse it, write END_OF_STREAM at the end, and check it after read out all result from python. Author: Davies Liu <davies.liu@gmail.com> Author: Davies Liu <davies@databricks.com> Closes #2838 from davies/fix_reuse and squashes the following commits: 8872914 [Davies Liu] fix tests 660875b [Davies Liu] fix bug while reuse worker after take()
Diffstat (limited to 'python/pyspark/tests.py')
-rw-r--r--python/pyspark/tests.py19
1 files changed, 18 insertions, 1 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 1a8e4150e6..7a2107ec32 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -31,7 +31,7 @@ import tempfile
import time
import zipfile
import random
-from platform import python_implementation
+import threading
if sys.version_info[:2] <= (2, 6):
try:
@@ -1380,6 +1380,23 @@ class WorkerTests(PySparkTestCase):
self.assertEqual(sum(range(100)), acc2.value)
self.assertEqual(sum(range(100)), acc1.value)
+ def test_reuse_worker_after_take(self):
+ rdd = self.sc.parallelize(range(100000), 1)
+ self.assertEqual(0, rdd.first())
+
+ def count():
+ try:
+ rdd.count()
+ except Exception:
+ pass
+
+ t = threading.Thread(target=count)
+ t.daemon = True
+ t.start()
+ t.join(5)
+ self.assertTrue(not t.isAlive())
+ self.assertEqual(100000, rdd.count())
+
class SparkSubmitTests(unittest.TestCase):