diff options
author | Jeff Zhang <zjffdu@apache.org> | 2016-08-11 20:08:25 -0700 |
---|---|---|
committer | Marcelo Vanzin <vanzin@cloudera.com> | 2016-08-11 20:08:39 -0700 |
commit | 7a9e25c38380e6c62080d62ad38a4830e44fe753 (patch) | |
tree | c7ee1437a3dec8abc0fef57b10406a5bd0b72987 /core/src/main | |
parent | ea0bf91b4a2ca3ef472906e50e31fd6268b6f53e (diff) | |
download | spark-7a9e25c38380e6c62080d62ad38a4830e44fe753.tar.gz spark-7a9e25c38380e6c62080d62ad38a4830e44fe753.tar.bz2 spark-7a9e25c38380e6c62080d62ad38a4830e44fe753.zip |
[SPARK-13081][PYSPARK][SPARK_SUBMIT] Allow set pythonExec of driver and executor through conf…
Before this PR, user have to export environment variable to specify the python of driver & executor which is not so convenient for users. This PR is trying to allow user to specify python through configuration "--pyspark-driver-python" & "--pyspark-executor-python"
Manually test in local & yarn mode for pyspark-shell and pyspark batch mode.
Author: Jeff Zhang <zjffdu@apache.org>
Closes #13146 from zjffdu/SPARK-13081.
Diffstat (limited to 'core/src/main')
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala | 14 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/internal/config/package.scala | 8 |
2 files changed, 19 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 6227a30dc9..0b1cec2df8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -24,8 +24,9 @@ import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConverters._ import scala.util.Try -import org.apache.spark.SparkUserAppException +import org.apache.spark.{SparkConf, SparkUserAppException} import org.apache.spark.api.python.PythonUtils +import org.apache.spark.internal.config._ import org.apache.spark.util.{RedirectThread, Utils} /** @@ -37,8 +38,12 @@ object PythonRunner { val pythonFile = args(0) val pyFiles = args(1) val otherArgs = args.slice(2, args.length) - val pythonExec = - sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python")) + val sparkConf = new SparkConf() + val pythonExec = sparkConf.get(PYSPARK_DRIVER_PYTHON) + .orElse(sparkConf.get(PYSPARK_PYTHON)) + .orElse(sys.env.get("PYSPARK_DRIVER_PYTHON")) + .orElse(sys.env.get("PYSPARK_PYTHON")) + .getOrElse("python") // Format python file paths before adding them to the PYTHONPATH val formattedPythonFile = formatPath(pythonFile) @@ -77,6 +82,9 @@ object PythonRunner { // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u: env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort) + // pass conf spark.pyspark.python to python process, the only way to pass info to + // python process is through environment variable. + sparkConf.get(PYSPARK_PYTHON).foreach(env.put("PYSPARK_PYTHON", _)) builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize try { val process = builder.start() diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index e646d9964a..be3dac4d24 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -106,4 +106,12 @@ package object config { private[spark] val METRICS_NAMESPACE = ConfigBuilder("spark.metrics.namespace") .stringConf .createOptional + + private[spark] val PYSPARK_DRIVER_PYTHON = ConfigBuilder("spark.pyspark.driver.python") + .stringConf + .createOptional + + private[spark] val PYSPARK_PYTHON = ConfigBuilder("spark.pyspark.python") + .stringConf + .createOptional } |