aboutsummaryrefslogtreecommitdiff
path: root/yarn/src/main/scala
diff options
context:
space:
mode:
authorKevinGrealish <KevinGre@microsoft.com>2016-07-27 13:50:31 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2016-07-27 13:50:31 -0700
commitb14d7b5cf4f173a1e45a4b1ae2a5e4e7ac5e9bb1 (patch)
treec5bf2444ca8bee5fab106b9a10cd26bcd456d2fd /yarn/src/main/scala
parentbc4851adeb386edc5bef47027a12ca44eda82b09 (diff)
downloadspark-b14d7b5cf4f173a1e45a4b1ae2a5e4e7ac5e9bb1.tar.gz
spark-b14d7b5cf4f173a1e45a4b1ae2a5e4e7ac5e9bb1.tar.bz2
spark-b14d7b5cf4f173a1e45a4b1ae2a5e4e7ac5e9bb1.zip
[SPARK-16110][YARN][PYSPARK] Fix allowing python version to be specified per submit for cluster mode.
## What changes were proposed in this pull request? This fix allows submit of pyspark jobs to specify python 2 or 3. Change ordering in setup for application master environment so env vars PYSPARK_PYTHON and PYSPARK_DRIVER_PYTHON can be overridden by spark.yarn.appMasterEnv.* conf settings. This applies to YARN in cluster mode. This allows them to be set per submission without needing the unset the env vars (which is not always possible - e.g. batch submit with LIVY only exposes the arguments to spark-submit) ## How was this patch tested? Manual and existing unit tests. Author: KevinGrealish <KevinGre@microsoft.com> Closes #13824 from KevinGrealish/SPARK-16110.
Diffstat (limited to 'yarn/src/main/scala')
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala7
1 files changed, 5 insertions, 2 deletions
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 244d1a4e33..348f9bf94a 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -831,8 +831,11 @@ private[spark] class Client(
env("SPARK_JAVA_OPTS") = value
}
// propagate PYSPARK_DRIVER_PYTHON and PYSPARK_PYTHON to driver in cluster mode
- sys.env.get("PYSPARK_DRIVER_PYTHON").foreach(env("PYSPARK_DRIVER_PYTHON") = _)
- sys.env.get("PYSPARK_PYTHON").foreach(env("PYSPARK_PYTHON") = _)
+ Seq("PYSPARK_DRIVER_PYTHON", "PYSPARK_PYTHON").foreach { envname =>
+ if (!env.contains(envname)) {
+ sys.env.get(envname).foreach(env(envname) = _)
+ }
+ }
}
sys.env.get(ENV_DIST_CLASSPATH).foreach { dcp =>