From c1840a862eb548bc4306e53ee7e9f26986b31832 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Tue, 18 Aug 2015 11:36:36 -0700 Subject: [SPARK-7736] [CORE] Fix a race introduced in PythonRunner. The fix for SPARK-7736 introduced a race where a port value of "-1" could be passed down to the pyspark process, causing it to fail to connect back to the JVM. This change adds code to fix that race. Author: Marcelo Vanzin Closes #8258 from vanzin/SPARK-7736. --- core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'core') diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 4277ac2ad1..23d01e9cbb 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -52,10 +52,16 @@ object PythonRunner { gatewayServer.start() } }) - thread.setName("py4j-gateway") + thread.setName("py4j-gateway-init") thread.setDaemon(true) thread.start() + // Wait until the gateway server has started, so that we know which port is it bound to. + // `gatewayServer.start()` will start a new thread and run the server code there, after + // initializing the socket, so the thread started above will end as soon as the server is + // ready to serve connections. + thread.join() + // Build up a PYTHONPATH that includes the Spark assembly JAR (where this class is), the // python directories in SPARK_HOME (if set), and any files in the pyFiles argument val pathElements = new ArrayBuffer[String] -- cgit v1.2.3