From e7672f19674c37fbd1a43fb3793b69097349bca1 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 26 Aug 2014 23:06:11 -0700 Subject: [SPARK-3167] Handle special driver configs in Windows (Branch 1.1) This is an effort to bring the Windows scripts up to speed after recent splashing changes in #1845. Author: Andrew Or Closes #2156 from andrewor14/windows-config-branch-1.1 and squashes the following commits: 00b9dfe [Andrew Or] [SPARK-3167] Handle special driver configs in Windows --- python/pyspark/java_gateway.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'python') diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 6f4f62f23b..9c70fa5c16 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -15,6 +15,7 @@ # limitations under the License. # +import atexit import os import sys import signal @@ -69,6 +70,22 @@ def launch_gateway(): error_msg += "--------------------------------------------------------------\n" raise Exception(error_msg) + # In Windows, ensure the Java child processes do not linger after Python has exited. + # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when + # the parent process' stdin sends an EOF). In Windows, however, this is not possible + # because java.lang.Process reads directly from the parent process' stdin, contending + # with any opportunity to read an EOF from the parent. Note that this is only best + # effort and will not take effect if the python process is violently terminated. + if on_windows: + # In Windows, the child process here is "spark-submit.cmd", not the JVM itself + # (because the UNIX "exec" command is not available). This means we cannot simply + # call proc.kill(), which kills only the "spark-submit.cmd" process but not the + # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all + # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx) + def killChild(): + Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)]) + atexit.register(killChild) + # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: class EchoOutputThread(Thread): -- cgit v1.2.3