diff options
author | Sandy Ryza <sandy@cloudera.com> | 2014-04-29 23:24:34 -0700 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2014-04-29 23:24:34 -0700 |
commit | ff5be9a41e52454e0f9cae83dd1fd50fbeaa684a (patch) | |
tree | 5bd17eaa50b3120317370821104c9c6d9e238b56 /core | |
parent | 7025dda8fa84b57d6f12bc770df2fa10eef21d88 (diff) | |
download | spark-ff5be9a41e52454e0f9cae83dd1fd50fbeaa684a.tar.gz spark-ff5be9a41e52454e0f9cae83dd1fd50fbeaa684a.tar.bz2 spark-ff5be9a41e52454e0f9cae83dd1fd50fbeaa684a.zip |
SPARK-1004. PySpark on YARN
This reopens https://github.com/apache/incubator-spark/pull/640 against the new repo
Author: Sandy Ryza <sandy@cloudera.com>
Closes #30 from sryza/sandy-spark-1004 and squashes the following commits:
89889d4 [Sandy Ryza] Move unzipping py4j to the generate-resources phase so that it gets included in the jar the first time
5165a02 [Sandy Ryza] Fix docs
fd0df79 [Sandy Ryza] PySpark on YARN
Diffstat (limited to 'core')
-rw-r--r-- | core/pom.xml | 42 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala | 10 |
2 files changed, 44 insertions, 8 deletions
diff --git a/core/pom.xml b/core/pom.xml index 73f573a414..822b5b1dd7 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -294,6 +294,48 @@ </environmentVariables> </configuration> </plugin> + <!-- Unzip py4j so we can include its files in the jar --> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>exec-maven-plugin</artifactId> + <version>1.2.1</version> + <executions> + <execution> + <phase>generate-resources</phase> + <goals> + <goal>exec</goal> + </goals> + </execution> + </executions> + <configuration> + <executable>unzip</executable> + <workingDirectory>../python</workingDirectory> + <arguments> + <argument>-o</argument> + <argument>lib/py4j*.zip</argument> + <argument>-d</argument> + <argument>build</argument> + </arguments> + </configuration> + </plugin> </plugins> + + <resources> + <resource> + <directory>src/main/resources</directory> + </resource> + <resource> + <directory>../python</directory> + <includes> + <include>pyspark/*.py</include> + </includes> + </resource> + <resource> + <directory>../python/build</directory> + <includes> + <include>py4j/*.py</include> + </includes> + </resource> + </resources> </build> </project> diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala index a5f0f3d5e7..02799ce009 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala @@ -78,12 +78,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1))) // Create and start the worker - val sparkHome = new ProcessBuilder().environment().get("SPARK_HOME") - val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/worker.py")) + val pb = new ProcessBuilder(Seq(pythonExec, "-m", "pyspark.worker")) val workerEnv = pb.environment() workerEnv.putAll(envVars) - val pythonPath = sparkHome + "/python/" + File.pathSeparator + workerEnv.get("PYTHONPATH") - workerEnv.put("PYTHONPATH", pythonPath) val worker = pb.start() // Redirect the worker's stderr to ours @@ -154,12 +151,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String try { // Create and start the daemon - val sparkHome = new ProcessBuilder().environment().get("SPARK_HOME") - val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/daemon.py")) + val pb = new ProcessBuilder(Seq(pythonExec, "-m", "pyspark.daemon")) val workerEnv = pb.environment() workerEnv.putAll(envVars) - val pythonPath = sparkHome + "/python/" + File.pathSeparator + workerEnv.get("PYTHONPATH") - workerEnv.put("PYTHONPATH", pythonPath) daemon = pb.start() // Redirect the stderr to ours |