diff options
author | Andrew Or <andrewor14@gmail.com> | 2014-08-26 23:06:11 -0700 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2014-08-26 23:06:21 -0700 |
commit | e7672f19674c37fbd1a43fb3793b69097349bca1 (patch) | |
tree | fa34ecc4aa86d13dac30867dc0413a64c8d752c6 /python | |
parent | 2381e90dc04932f10d4c4534dcb64168799dc8af (diff) | |
download | spark-e7672f19674c37fbd1a43fb3793b69097349bca1.tar.gz spark-e7672f19674c37fbd1a43fb3793b69097349bca1.tar.bz2 spark-e7672f19674c37fbd1a43fb3793b69097349bca1.zip |
[SPARK-3167] Handle special driver configs in Windows (Branch 1.1)
This is an effort to bring the Windows scripts up to speed after recent splashing changes in #1845.
Author: Andrew Or <andrewor14@gmail.com>
Closes #2156 from andrewor14/windows-config-branch-1.1 and squashes the following commits:
00b9dfe [Andrew Or] [SPARK-3167] Handle special driver configs in Windows
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/java_gateway.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 6f4f62f23b..9c70fa5c16 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -15,6 +15,7 @@ # limitations under the License. # +import atexit import os import sys import signal @@ -69,6 +70,22 @@ def launch_gateway(): error_msg += "--------------------------------------------------------------\n" raise Exception(error_msg) + # In Windows, ensure the Java child processes do not linger after Python has exited. + # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when + # the parent process' stdin sends an EOF). In Windows, however, this is not possible + # because java.lang.Process reads directly from the parent process' stdin, contending + # with any opportunity to read an EOF from the parent. Note that this is only best + # effort and will not take effect if the python process is violently terminated. + if on_windows: + # In Windows, the child process here is "spark-submit.cmd", not the JVM itself + # (because the UNIX "exec" command is not available). This means we cannot simply + # call proc.kill(), which kills only the "spark-submit.cmd" process but not the + # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all + # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx) + def killChild(): + Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)]) + atexit.register(killChild) + # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: class EchoOutputThread(Thread): |