aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/java_gateway.py
diff options
context:
space:
mode:
authorAndrew Or <andrewor14@gmail.com>2014-08-26 22:52:16 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-08-26 22:52:16 -0700
commit7557c4cfef2398d124b00472e2696f0559a36ef7 (patch)
treec780f689efe09f3bc60077a62c9a43688c854279 /python/pyspark/java_gateway.py
parentbf719056b71d55e1194554661dfa194ed03d364d (diff)
downloadspark-7557c4cfef2398d124b00472e2696f0559a36ef7.tar.gz
spark-7557c4cfef2398d124b00472e2696f0559a36ef7.tar.bz2
spark-7557c4cfef2398d124b00472e2696f0559a36ef7.zip
[SPARK-3167] Handle special driver configs in Windows
This is an effort to bring the Windows scripts up to speed after recent splashing changes in #1845. Author: Andrew Or <andrewor14@gmail.com> Closes #2129 from andrewor14/windows-config and squashes the following commits: 881a8f0 [Andrew Or] Add reference to Windows taskkill 92e6047 [Andrew Or] Update a few comments (minor) 22b1acd [Andrew Or] Fix style again (minor) afcffea [Andrew Or] Fix style (minor) 72004c2 [Andrew Or] Actually respect --driver-java-options 803218b [Andrew Or] Actually respect SPARK_*_CLASSPATH eeb34a0 [Andrew Or] Update outdated comment (minor) 35caecc [Andrew Or] In Windows, actually kill Java processes on exit f97daa2 [Andrew Or] Fix Windows spark shell stdin issue 83ebe60 [Andrew Or] Parse special driver configs in Windows (broken)
Diffstat (limited to 'python/pyspark/java_gateway.py')
-rw-r--r--python/pyspark/java_gateway.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 6f4f62f23b..9c70fa5c16 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -15,6 +15,7 @@
# limitations under the License.
#
+import atexit
import os
import sys
import signal
@@ -69,6 +70,22 @@ def launch_gateway():
error_msg += "--------------------------------------------------------------\n"
raise Exception(error_msg)
+ # In Windows, ensure the Java child processes do not linger after Python has exited.
+ # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when
+ # the parent process' stdin sends an EOF). In Windows, however, this is not possible
+ # because java.lang.Process reads directly from the parent process' stdin, contending
+ # with any opportunity to read an EOF from the parent. Note that this is only best
+ # effort and will not take effect if the python process is violently terminated.
+ if on_windows:
+ # In Windows, the child process here is "spark-submit.cmd", not the JVM itself
+ # (because the UNIX "exec" command is not available). This means we cannot simply
+ # call proc.kill(), which kills only the "spark-submit.cmd" process but not the
+ # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all
+ # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx)
+ def killChild():
+ Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)])
+ atexit.register(killChild)
+
# Create a thread to echo output from the GatewayServer, which is required
# for Java log output to show up:
class EchoOutputThread(Thread):