aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/java_gateway.py
diff options
context:
space:
mode:
authorKay Ousterhout <kayousterhout@gmail.com>2014-06-18 13:16:26 -0700
committerKay Ousterhout <kayousterhout@gmail.com>2014-06-18 13:16:26 -0700
commit3870248740d83b0292ccca88a494ce19783847f0 (patch)
treea619c9f31901a2f4f2f572ea9b5be75c23f537b1 /python/pyspark/java_gateway.py
parentdd96fcda0145810785b67f847f98b04ff7f0d7c3 (diff)
downloadspark-3870248740d83b0292ccca88a494ce19783847f0.tar.gz
spark-3870248740d83b0292ccca88a494ce19783847f0.tar.bz2
spark-3870248740d83b0292ccca88a494ce19783847f0.zip
[SPARK-1466] Raise exception if pyspark Gateway process doesn't start.
If the gateway process fails to start correctly (e.g., because JAVA_HOME isn't set correctly, there's no Spark jar, etc.), right now pyspark fails because of a very difficult-to-understand error, where we try to parse stdout to get the port where Spark started and there's nothing there. This commit properly catches the error and throws an exception that includes the stderr output for much easier debugging. Thanks to @shivaram and @stogers for helping to fix this issue! Author: Kay Ousterhout <kayousterhout@gmail.com> Closes #383 from kayousterhout/pyspark and squashes the following commits: 36dd54b [Kay Ousterhout] [SPARK-1466] Raise exception if Gateway process doesn't start.
Diffstat (limited to 'python/pyspark/java_gateway.py')
-rw-r--r--python/pyspark/java_gateway.py15
1 files changed, 11 insertions, 4 deletions
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 91ae8263f6..19235d5f79 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -43,12 +43,19 @@ def launch_gateway():
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
- proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func)
+ proc = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE, preexec_fn=preexec_func)
else:
# preexec_fn not supported on Windows
- proc = Popen(command, stdout=PIPE, stdin=PIPE)
- # Determine which ephemeral port the server started on:
- gateway_port = int(proc.stdout.readline())
+ proc = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE)
+
+ try:
+ # Determine which ephemeral port the server started on:
+ gateway_port = int(proc.stdout.readline())
+ except:
+ error_code = proc.poll()
+ raise Exception("Launching GatewayServer failed with exit code %d: %s" %
+ (error_code, "".join(proc.stderr.readlines())))
+
# Create a thread to echo output from the GatewayServer, which is required
# for Java log output to show up:
class EchoOutputThread(Thread):