SPARK-1064

This reopens PR 649 from incubator-spark against the new repo Author: Sandy Ryza <sandy@cloudera.com> Closes #102 from sryza/sandy-spark-1064 and squashes the following commits: 270e490 [Sandy Ryza] Handle different application classpath variables in different versions 88b04e0 [Sandy Ryza] SPARK-1064. Make it possible to run on YARN without bundling Hadoop jars in Spark assembly
author: Sandy Ryza <sandy@cloudera.com> 2014-03-11 22:39:17 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-03-11 22:39:17 -0700
commit: 2409af9dcf238e1ad87080a389e05a696c41dc72 (patch)
tree: 2c65a789a99b7e95389cf766140eecf55af99f31 /yarn
parent: 16788a654246067fd966033b5dc9bc0d4c759b70 (diff)
download: spark-2409af9dcf238e1ad87080a389e05a696c41dc72.tar.gz
spark-2409af9dcf238e1ad87080a389e05a696c41dc72.tar.bz2
spark-2409af9dcf238e1ad87080a389e05a696c41dc72.zip
1 files changed, 42 insertions, 1 deletions
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 4b6c7db836..74de4293d9 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -29,8 +29,10 @@ import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.DataOutputBuffer
 import org.apache.hadoop.mapred.Master
+import org.apache.hadoop.mapreduce.MRJobConfig
 import org.apache.hadoop.net.NetUtils
 import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords._
@@ -379,9 +381,48 @@ object ClientBase {
 
   // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps
   def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) {
-    for (c <- conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) {
+    val classpathEntries = Option(conf.getStrings(
+      YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse(
+        getDefaultYarnApplicationClasspath())
+    for (c <- classpathEntries) {
       Apps.addToEnvironment(env, Environment.CLASSPATH.name, c.trim)
     }
+
+    val mrClasspathEntries = Option(conf.getStrings(
+      "mapreduce.application.classpath")).getOrElse(
+        getDefaultMRApplicationClasspath())
+    if (mrClasspathEntries != null) {
+      for (c <- mrClasspathEntries) {
+        Apps.addToEnvironment(env, Environment.CLASSPATH.name, c.trim)
+      }
+    }
+  }
+
+  def getDefaultYarnApplicationClasspath(): Array[String] = {
+    try {
+      val field = classOf[MRJobConfig].getField("DEFAULT_YARN_APPLICATION_CLASSPATH")
+      field.get(null).asInstanceOf[Array[String]]
+    } catch {
+      case err: NoSuchFieldError => null
+    }
+  }
+
+  /**
+   * In Hadoop 0.23, the MR application classpath comes with the YARN application
+   * classpath.  In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String.
+   * So we need to use reflection to retrieve it.
+   */
+  def getDefaultMRApplicationClasspath(): Array[String] = {
+    try {
+      val field = classOf[MRJobConfig].getField("DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH")
+      if (field.getType == classOf[String]) {
+        StringUtils.getStrings(field.get(null).asInstanceOf[String])
+      } else {
+        field.get(null).asInstanceOf[Array[String]]
+      }
+    } catch {
+      case err: NoSuchFieldError => null
+    }
   }
 
   def populateClasspath(conf: Configuration, sparkConf: SparkConf, addLog4j: Boolean, env: HashMap[String, String]) {
author	Sandy Ryza <sandy@cloudera.com>	2014-03-11 22:39:17 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-03-11 22:39:17 -0700
commit	2409af9dcf238e1ad87080a389e05a696c41dc72 (patch)
tree	2c65a789a99b7e95389cf766140eecf55af99f31 /yarn
parent	16788a654246067fd966033b5dc9bc0d4c759b70 (diff)
download	spark-2409af9dcf238e1ad87080a389e05a696c41dc72.tar.gz spark-2409af9dcf238e1ad87080a389e05a696c41dc72.tar.bz2 spark-2409af9dcf238e1ad87080a389e05a696c41dc72.zip