aboutsummaryrefslogtreecommitdiff
path: root/yarn
diff options
context:
space:
mode:
authorxuan <xuan@MacBook-Pro.local>2014-04-16 14:41:22 -0500
committerThomas Graves <tgraves@apache.org>2014-04-16 14:41:22 -0500
commit725925cf2120e998651f7d1406fdb34fc2405b9f (patch)
tree8e25e11e4b8de99d7179bd6dc6efc4e668b56fbe /yarn
parente269c24db7882ba05b26eff8fc6e1869103517f8 (diff)
downloadspark-725925cf2120e998651f7d1406fdb34fc2405b9f.tar.gz
spark-725925cf2120e998651f7d1406fdb34fc2405b9f.tar.bz2
spark-725925cf2120e998651f7d1406fdb34fc2405b9f.zip
SPARK-1465: Spark compilation is broken with the latest hadoop-2.4.0 release
YARN-1824 changes the APIs (addToEnvironment, setEnvFromInputString) in Apps, which causes the spark build to break if built against a version 2.4.0. To fix this, create the spark own function to do that functionality which will not break compiling against 2.3 and other 2.x versions. Author: xuan <xuan@MacBook-Pro.local> Author: xuan <xuan@macbook-pro.home> Closes #396 from xgong/master and squashes the following commits: 42b5984 [xuan] Remove two extra imports bc0926f [xuan] Remove usage of org.apache.hadoop.util.Shell be89fa7 [xuan] fix Spark compilation is broken with the latest hadoop-2.4.0 release
Diffstat (limited to 'yarn')
-rw-r--r--yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala33
-rw-r--r--yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala4
-rw-r--r--yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala63
3 files changed, 85 insertions, 15 deletions
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index eb42922aea..628dd98860 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -17,6 +17,7 @@
package org.apache.spark.deploy.yarn
+import java.io.File
import java.net.{InetAddress, UnknownHostException, URI}
import java.nio.ByteBuffer
@@ -280,7 +281,8 @@ trait ClientBase extends Logging {
distCacheMgr.setDistArchivesEnv(env)
// Allow users to specify some environment variables.
- Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"))
+ YarnSparkHadoopUtil.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"),
+ File.pathSeparator)
// Add each SPARK_* key to the environment.
System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v }
@@ -382,7 +384,8 @@ object ClientBase {
YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse(
getDefaultYarnApplicationClasspath())
for (c <- classpathEntries) {
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, c.trim)
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim,
+ File.pathSeparator)
}
val mrClasspathEntries = Option(conf.getStrings(
@@ -390,7 +393,8 @@ object ClientBase {
getDefaultMRApplicationClasspath())
if (mrClasspathEntries != null) {
for (c <- mrClasspathEntries) {
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, c.trim)
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim,
+ File.pathSeparator)
}
}
}
@@ -425,28 +429,29 @@ object ClientBase {
}
def populateClasspath(conf: Configuration, sparkConf: SparkConf, addLog4j: Boolean, env: HashMap[String, String]) {
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$())
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$(),
+ File.pathSeparator)
// If log4j present, ensure ours overrides all others
if (addLog4j) {
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
- Path.SEPARATOR + LOG4J_PROP)
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
+ Path.SEPARATOR + LOG4J_PROP, File.pathSeparator)
}
// Normally the users app.jar is last in case conflicts with spark jars
val userClasspathFirst = sparkConf.get("spark.yarn.user.classpath.first", "false")
.toBoolean
if (userClasspathFirst) {
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
- Path.SEPARATOR + APP_JAR)
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
+ Path.SEPARATOR + APP_JAR, File.pathSeparator)
}
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
- Path.SEPARATOR + SPARK_JAR)
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
+ Path.SEPARATOR + SPARK_JAR, File.pathSeparator)
ClientBase.populateHadoopClasspath(conf, env)
if (!userClasspathFirst) {
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
- Path.SEPARATOR + APP_JAR)
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
+ Path.SEPARATOR + APP_JAR, File.pathSeparator)
}
- Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
- Path.SEPARATOR + "*")
+ YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
+ Path.SEPARATOR + "*", File.pathSeparator)
}
}
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
index b3696c5fe7..9159cc4ad5 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
@@ -17,6 +17,7 @@
package org.apache.spark.deploy.yarn
+import java.io.File
import java.net.URI
import java.nio.ByteBuffer
import java.security.PrivilegedExceptionAction
@@ -167,7 +168,8 @@ trait ExecutorRunnableUtil extends Logging {
ClientBase.populateClasspath(yarnConf, sparkConf, System.getenv("SPARK_YARN_LOG4J_PATH") != null, env)
// Allow users to specify some environment variables
- Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"))
+ YarnSparkHadoopUtil.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"),
+ File.pathSeparator)
System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v }
env
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 314a7550ad..4ceed95a25 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -17,10 +17,16 @@
package org.apache.spark.deploy.yarn
+import java.util.regex.Matcher
+import java.util.regex.Pattern
+
+import scala.collection.mutable.HashMap
+
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.util.StringInterner
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.api.ApplicationConstants
import org.apache.hadoop.conf.Configuration
@@ -73,4 +79,61 @@ object YarnSparkHadoopUtil {
def getLoggingArgsForContainerCommandLine(): String = {
"-Dlog4j.configuration=log4j-spark-container.properties"
}
+
+ def addToEnvironment(
+ env: HashMap[String, String],
+ variable: String,
+ value: String,
+ classPathSeparator: String) = {
+ var envVariable = ""
+ if (env.get(variable) == None) {
+ envVariable = value
+ } else {
+ envVariable = env.get(variable).get + classPathSeparator + value
+ }
+ env put (StringInterner.weakIntern(variable), StringInterner.weakIntern(envVariable))
+ }
+
+ def setEnvFromInputString(
+ env: HashMap[String, String],
+ envString: String,
+ classPathSeparator: String) = {
+ if (envString != null && envString.length() > 0) {
+ var childEnvs = envString.split(",")
+ var p = Pattern.compile(getEnvironmentVariableRegex())
+ for (cEnv <- childEnvs) {
+ var parts = cEnv.split("=") // split on '='
+ var m = p.matcher(parts(1))
+ val sb = new StringBuffer
+ while (m.find()) {
+ val variable = m.group(1)
+ var replace = ""
+ if (env.get(variable) != None) {
+ replace = env.get(variable).get
+ } else {
+ // if this key is not configured for the child .. get it
+ // from the env
+ replace = System.getenv(variable)
+ if (replace == null) {
+ // the env key is note present anywhere .. simply set it
+ replace = ""
+ }
+ }
+ m.appendReplacement(sb, Matcher.quoteReplacement(replace))
+ }
+ m.appendTail(sb)
+ addToEnvironment(env, parts(0), sb.toString(), classPathSeparator)
+ }
+ }
+ }
+
+ private def getEnvironmentVariableRegex() : String = {
+ val osName = System.getProperty("os.name")
+ if (osName startsWith "Windows") {
+ "%([A-Za-z_][A-Za-z0-9_]*?)%"
+ } else {
+ "\\$([A-Za-z_][A-Za-z0-9_]*)"
+ }
+ }
+
}