aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Graves <tgraves@apache.org>2014-08-05 15:57:32 -0500
committerThomas Graves <tgraves@apache.org>2014-08-05 15:57:32 -0500
commit41e0a21b22ccd2788dc079790788e505b0d4e37d (patch)
tree2d211afa9ebaf49101b1b4de44ce47f83033d4c5
parent74f82c71b03d265a7d0c98ce196ca8c44de002e8 (diff)
downloadspark-41e0a21b22ccd2788dc079790788e505b0d4e37d.tar.gz
spark-41e0a21b22ccd2788dc079790788e505b0d4e37d.tar.bz2
spark-41e0a21b22ccd2788dc079790788e505b0d4e37d.zip
SPARK-1680: use configs for specifying environment variables on YARN
Note that this also documents spark.executorEnv.* which to me means its public. If we don't want that please speak up. Author: Thomas Graves <tgraves@apache.org> Closes #1512 from tgravescs/SPARK-1680 and squashes the following commits: 11525df [Thomas Graves] more doc changes 553bad0 [Thomas Graves] fix documentation 152bf7c [Thomas Graves] fix docs 5382326 [Thomas Graves] try fix docs 32f86a4 [Thomas Graves] use configs for specifying environment variables on YARN
-rw-r--r--docs/configuration.md8
-rw-r--r--docs/running-on-yarn.md22
-rw-r--r--yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala13
-rw-r--r--yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala6
4 files changed, 43 insertions, 6 deletions
diff --git a/docs/configuration.md b/docs/configuration.md
index 25adea210c..5e7556c08e 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -206,6 +206,14 @@ Apart from these, the following properties are also available, and may be useful
used during aggregation goes above this amount, it will spill the data into disks.
</td>
</tr>
+<tr>
+ <td><code>spark.executorEnv.[EnvironmentVariableName]</code></td>
+ <td>(none)</td>
+ <td>
+ Add the environment variable specified by <code>EnvironmentVariableName</code> to the Executor
+ process. The user can specify multiple of these and to set multiple environment variables.
+ </td>
+</tr>
</table>
#### Shuffle Behavior
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 573930dbf4..9bc20dbf92 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -17,10 +17,6 @@ To build Spark yourself, refer to the [building with Maven guide](building-with-
Most of the configs are the same for Spark on YARN as for other deployment modes. See the [configuration page](configuration.html) for more information on those. These are configs that are specific to Spark on YARN.
-#### Environment Variables
-
-* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV="JAVA_HOME=/jdk64,FOO=bar"`.
-
#### Spark Properties
<table class="table">
@@ -110,7 +106,23 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
<td><code>spark.yarn.access.namenodes</code></td>
<td>(none)</td>
<td>
- A list of secure HDFS namenodes your Spark application is going to access. For example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The Spark application must have acess to the namenodes listed and Kerberos must be properly configured to be able to access them (either in the same realm or in a trusted realm). Spark acquires security tokens for each of the namenodes so that the Spark application can access those remote HDFS clusters.
+ A list of secure HDFS namenodes your Spark application is going to access. For
+ example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`.
+ The Spark application must have acess to the namenodes listed and Kerberos must
+ be properly configured to be able to access them (either in the same realm or in
+ a trusted realm). Spark acquires security tokens for each of the namenodes so that
+ the Spark application can access those remote HDFS clusters.
+ </td>
+</tr>
+<tr>
+ <td><code>spark.yarn.appMasterEnv.[EnvironmentVariableName]</code></td>
+ <td>(none)</td>
+ <td>
+ Add the environment variable specified by <code>EnvironmentVariableName</code> to the
+ Application Master process launched on YARN. The user can specify multiple of
+ these and to set multiple environment variables. In yarn-cluster mode this controls
+ the environment of the SPARK driver and in yarn-client mode it only controls
+ the environment of the executor launcher.
</td>
</tr>
</table>
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 44e025b8f6..1da0a1b675 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -259,6 +259,14 @@ trait ClientBase extends Logging {
localResources
}
+ /** Get all application master environment variables set on this SparkConf */
+ def getAppMasterEnv: Seq[(String, String)] = {
+ val prefix = "spark.yarn.appMasterEnv."
+ sparkConf.getAll.filter{case (k, v) => k.startsWith(prefix)}
+ .map{case (k, v) => (k.substring(prefix.length), v)}
+ }
+
+
def setupLaunchEnv(
localResources: HashMap[String, LocalResource],
stagingDir: String): HashMap[String, String] = {
@@ -276,6 +284,11 @@ trait ClientBase extends Logging {
distCacheMgr.setDistFilesEnv(env)
distCacheMgr.setDistArchivesEnv(env)
+ getAppMasterEnv.foreach { case (key, value) =>
+ YarnSparkHadoopUtil.addToEnvironment(env, key, value, File.pathSeparator)
+ }
+
+ // Keep this for backwards compatibility but users should move to the config
sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
// Allow users to specify some environment variables.
YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs, File.pathSeparator)
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
index 4ba7133a95..71a9e42846 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
@@ -171,7 +171,11 @@ trait ExecutorRunnableUtil extends Logging {
val extraCp = sparkConf.getOption("spark.executor.extraClassPath")
ClientBase.populateClasspath(null, yarnConf, sparkConf, env, extraCp)
- // Allow users to specify some environment variables
+ sparkConf.getExecutorEnv.foreach { case (key, value) =>
+ YarnSparkHadoopUtil.addToEnvironment(env, key, value, File.pathSeparator)
+ }
+
+ // Keep this for backwards compatibility but users should move to the config
YarnSparkHadoopUtil.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"),
File.pathSeparator)