aboutsummaryrefslogtreecommitdiff
path: root/core/src
diff options
context:
space:
mode:
authorNezih Yigitbasi <nyigitbasi@netflix.com>2016-06-16 18:19:29 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2016-06-16 18:20:16 -0700
commit63470afc997fb9d6b6f8a911c25964743556c9cc (patch)
tree02588d1187e7e3ff855edf3f55f0d3a5aa10d39f /core/src
parentf1bf0d2f3a61d81686f36763e83d3be89c98435f (diff)
downloadspark-63470afc997fb9d6b6f8a911c25964743556c9cc.tar.gz
spark-63470afc997fb9d6b6f8a911c25964743556c9cc.tar.bz2
spark-63470afc997fb9d6b6f8a911c25964743556c9cc.zip
[SPARK-15782][YARN] Fix spark.jars and spark.yarn.dist.jars handling
When `--packages` is specified with spark-shell the classes from those packages cannot be found, which I think is due to some of the changes in SPARK-12343. Tested manually with both scala 2.10 and 2.11 repls. vanzin davies can you guys please review? Author: Marcelo Vanzin <vanzin@cloudera.com> Author: Nezih Yigitbasi <nyigitbasi@netflix.com> Closes #13709 from nezihyigitbasi/SPARK-15782.
Diffstat (limited to 'core/src')
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala25
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala12
3 files changed, 38 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d56946e932..d8701812eb 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -391,7 +391,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
_conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)
- _jars = _conf.getOption("spark.jars").map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+ _jars = Utils.getUserJars(_conf)
_files = _conf.getOption("spark.files").map(_.split(",")).map(_.filter(_.nonEmpty))
.toSeq.flatten
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index f9d05409e1..17d193b773 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2352,6 +2352,31 @@ private[spark] object Utils extends Logging {
log.info(s"Started daemon with process name: ${Utils.getProcessName()}")
SignalUtils.registerLogger(log)
}
+
+ /**
+ * Unions two comma-separated lists of files and filters out empty strings.
+ */
+ def unionFileLists(leftList: Option[String], rightList: Option[String]): Set[String] = {
+ var allFiles = Set[String]()
+ leftList.foreach { value => allFiles ++= value.split(",") }
+ rightList.foreach { value => allFiles ++= value.split(",") }
+ allFiles.filter { _.nonEmpty }
+ }
+
+ /**
+ * In YARN mode this method returns a union of the jar files pointed by "spark.jars" and the
+ * "spark.yarn.dist.jars" properties, while in other modes it returns the jar files pointed by
+ * only the "spark.jars" property.
+ */
+ def getUserJars(conf: SparkConf): Seq[String] = {
+ val sparkJars = conf.getOption("spark.jars")
+ if (conf.get("spark.master") == "yarn") {
+ val yarnJars = conf.getOption("spark.yarn.dist.jars")
+ unionFileLists(sparkJars, yarnJars).toSeq
+ } else {
+ sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+ }
+ }
}
/**
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 2718976992..0b020592b0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -570,6 +570,18 @@ class SparkSubmitSuite
appArgs.executorMemory should be ("2.3g")
}
}
+
+ test("comma separated list of files are unioned correctly") {
+ val left = Option("/tmp/a.jar,/tmp/b.jar")
+ val right = Option("/tmp/c.jar,/tmp/a.jar")
+ val emptyString = Option("")
+ Utils.unionFileLists(left, right) should be (Set("/tmp/a.jar", "/tmp/b.jar", "/tmp/c.jar"))
+ Utils.unionFileLists(emptyString, emptyString) should be (Set.empty)
+ Utils.unionFileLists(Option("/tmp/a.jar"), emptyString) should be (Set("/tmp/a.jar"))
+ Utils.unionFileLists(emptyString, Option("/tmp/a.jar")) should be (Set("/tmp/a.jar"))
+ Utils.unionFileLists(None, Option("/tmp/a.jar")) should be (Set("/tmp/a.jar"))
+ Utils.unionFileLists(Option("/tmp/a.jar"), None) should be (Set("/tmp/a.jar"))
+ }
// scalastyle:on println
// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.