From e41786c77482d3f9e3c01cfd583c8899815c3106 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Sat, 25 Oct 2014 01:20:39 -0700 Subject: [SPARK-4088] [PySpark] Python worker should exit after socket is closed by JVM In case of take() or exception in Python, python worker may exit before JVM read() all the response, then the write thread may raise "Connection reset" exception. Python should always wait JVM to close the socket first. cc JoshRosen This is a warm fix, or the tests will be flaky, sorry for that. Author: Davies Liu Closes #2941 from davies/fix_exit and squashes the following commits: 9d4d21e [Davies Liu] fix race --- python/pyspark/daemon.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'python') diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py index dbb34775d9..f09587f211 100644 --- a/python/pyspark/daemon.py +++ b/python/pyspark/daemon.py @@ -62,8 +62,7 @@ def worker(sock): exit_code = compute_real_exit_code(exc.code) finally: outfile.flush() - if exit_code: - os._exit(exit_code) + return exit_code # Cleanup zombie children @@ -160,10 +159,13 @@ def manager(): outfile.flush() outfile.close() while True: - worker(sock) - if not reuse: + code = worker(sock) + if not reuse or code: # wait for closing - while sock.recv(1024): + try: + while sock.recv(1024): + pass + except Exception: pass break gc.collect() -- cgit v1.2.3