aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-08-03 17:42:03 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2015-08-03 17:42:03 -0700
commit1633d0a2612d94151f620c919425026150e69ae1 (patch)
treef9285012a451589d398920df71d048a8dad05bc3
parentb79b4f5f2251ed7efeec1f4b26e45a8ea6b85a6a (diff)
downloadspark-1633d0a2612d94151f620c919425026150e69ae1.tar.gz
spark-1633d0a2612d94151f620c919425026150e69ae1.tar.bz2
spark-1633d0a2612d94151f620c919425026150e69ae1.zip
[SPARK-9263] Added flags to exclude dependencies when using --packages
While the functionality is there to exclude packages, there are no flags that allow users to exclude dependencies, in case of dependency conflicts. We should provide users with a flag to add dependency exclusions in case the packages are not resolved properly (or not available due to licensing). The flag I added was --packages-exclude, but I'm open on renaming it. I also added property flags in case people would like to use a conf file to provide dependencies, which is possible if there is a long list of dependencies or exclusions. cc andrewor14 vanzin pwendell Author: Burak Yavuz <brkyvz@gmail.com> Closes #7599 from brkyvz/packages-exclusions and squashes the following commits: 636f410 [Burak Yavuz] addressed nits 6e54ede [Burak Yavuz] is this the culprit b5e508e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into packages-exclusions 154f5db [Burak Yavuz] addressed initial comments 1536d7a [Burak Yavuz] Added flags to exclude packages using --packages-exclude
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala29
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala11
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala30
-rw-r--r--launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java2
4 files changed, 57 insertions, 15 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 0b39ee8fe3..31185c8e77 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction
import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
+import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.Path
import org.apache.hadoop.security.UserGroupInformation
import org.apache.ivy.Ivy
@@ -37,6 +38,7 @@ import org.apache.ivy.core.settings.IvySettings
import org.apache.ivy.plugins.matcher.GlobPatternMatcher
import org.apache.ivy.plugins.repository.file.FileRepository
import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, IBiblioResolver}
+
import org.apache.spark.api.r.RUtils
import org.apache.spark.SPARK_VERSION
import org.apache.spark.deploy.rest._
@@ -275,21 +277,18 @@ object SparkSubmit {
// Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
// too for packages that include Python code
- val resolvedMavenCoordinates =
- SparkSubmitUtils.resolveMavenCoordinates(
- args.packages, Option(args.repositories), Option(args.ivyRepoPath))
- if (!resolvedMavenCoordinates.trim.isEmpty) {
- if (args.jars == null || args.jars.trim.isEmpty) {
- args.jars = resolvedMavenCoordinates
+ val exclusions: Seq[String] =
+ if (!StringUtils.isBlank(args.packagesExclusions)) {
+ args.packagesExclusions.split(",")
} else {
- args.jars += s",$resolvedMavenCoordinates"
+ Nil
}
+ val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages,
+ Some(args.repositories), Some(args.ivyRepoPath), exclusions = exclusions)
+ if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
+ args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
if (args.isPython) {
- if (args.pyFiles == null || args.pyFiles.trim.isEmpty) {
- args.pyFiles = resolvedMavenCoordinates
- } else {
- args.pyFiles += s",$resolvedMavenCoordinates"
- }
+ args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
}
}
@@ -736,7 +735,7 @@ object SparkSubmit {
* no files, into a single comma-separated string.
*/
private def mergeFileLists(lists: String*): String = {
- val merged = lists.filter(_ != null)
+ val merged = lists.filterNot(StringUtils.isBlank)
.flatMap(_.split(","))
.mkString(",")
if (merged == "") null else merged
@@ -938,7 +937,7 @@ private[spark] object SparkSubmitUtils {
// are supplied to spark-submit
val alternateIvyCache = ivyPath.getOrElse("")
val packagesDirectory: File =
- if (alternateIvyCache.trim.isEmpty) {
+ if (alternateIvyCache == null || alternateIvyCache.trim.isEmpty) {
new File(ivySettings.getDefaultIvyUserDir, "jars")
} else {
ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache))
@@ -1010,7 +1009,7 @@ private[spark] object SparkSubmitUtils {
}
}
- private def createExclusion(
+ private[deploy] def createExclusion(
coords: String,
ivySettings: IvySettings,
ivyConfName: String): ExcludeRule = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index b3710073e3..44852ce4e8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -59,6 +59,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
var packages: String = null
var repositories: String = null
var ivyRepoPath: String = null
+ var packagesExclusions: String = null
var verbose: Boolean = false
var isPython: Boolean = false
var pyFiles: String = null
@@ -172,6 +173,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull
ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
+ packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
+ packagesExclusions = Option(packagesExclusions)
+ .orElse(sparkProperties.get("spark.jars.excludes")).orNull
deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull
numExecutors = Option(numExecutors)
.getOrElse(sparkProperties.get("spark.executor.instances").orNull)
@@ -299,6 +303,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
| childArgs [${childArgs.mkString(" ")}]
| jars $jars
| packages $packages
+ | packagesExclusions $packagesExclusions
| repositories $repositories
| verbose $verbose
|
@@ -391,6 +396,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
case PACKAGES =>
packages = value
+ case PACKAGES_EXCLUDE =>
+ packagesExclusions = value
+
case REPOSITORIES =>
repositories = value
@@ -482,6 +490,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
| maven repo, then maven central and any additional remote
| repositories given by --repositories. The format for the
| coordinates should be groupId:artifactId:version.
+ | --exclude-packages Comma-separated list of groupId:artifactId, to exclude while
+ | resolving the dependencies provided in --packages to avoid
+ | dependency conflicts.
| --repositories Comma-separated list of additional remote repositories to
| search for the maven coordinates given with --packages.
| --py-files PY_FILES Comma-separated list of .zip, .egg, or .py files to place
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 01ece1a10f..63c346c1b8 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -95,6 +95,25 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(md.getDependencies.length === 2)
}
+ test("excludes works correctly") {
+ val md = SparkSubmitUtils.getModuleDescriptor
+ val excludes = Seq("a:b", "c:d")
+ excludes.foreach { e =>
+ md.addExcludeRule(SparkSubmitUtils.createExclusion(e + ":*", new IvySettings, "default"))
+ }
+ val rules = md.getAllExcludeRules
+ assert(rules.length === 2)
+ val rule1 = rules(0).getId.getModuleId
+ assert(rule1.getOrganisation === "a")
+ assert(rule1.getName === "b")
+ val rule2 = rules(1).getId.getModuleId
+ assert(rule2.getOrganisation === "c")
+ assert(rule2.getName === "d")
+ intercept[IllegalArgumentException] {
+ SparkSubmitUtils.createExclusion("e:f:g:h", new IvySettings, "default")
+ }
+ }
+
test("ivy path works correctly") {
val md = SparkSubmitUtils.getModuleDescriptor
val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar")
@@ -168,4 +187,15 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact")
}
}
+
+ test("exclude dependencies end to end") {
+ val main = new MavenCoordinate("my.great.lib", "mylib", "0.1")
+ val dep = "my.great.dep:mydep:0.5"
+ IvyTestUtils.withRepository(main, Some(dep), None) { repo =>
+ val files = SparkSubmitUtils.resolveMavenCoordinates(main.toString,
+ Some(repo), None, Seq("my.great.dep:mydep"), isTest = true)
+ assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact")
+ assert(files.indexOf("my.great.dep") < 0, "Returned excluded artifact")
+ }
+ }
}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
index b88bba883a..5779eb3fc0 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
@@ -51,6 +51,7 @@ class SparkSubmitOptionParser {
protected final String MASTER = "--master";
protected final String NAME = "--name";
protected final String PACKAGES = "--packages";
+ protected final String PACKAGES_EXCLUDE = "--exclude-packages";
protected final String PROPERTIES_FILE = "--properties-file";
protected final String PROXY_USER = "--proxy-user";
protected final String PY_FILES = "--py-files";
@@ -105,6 +106,7 @@ class SparkSubmitOptionParser {
{ NAME },
{ NUM_EXECUTORS },
{ PACKAGES },
+ { PACKAGES_EXCLUDE },
{ PRINCIPAL },
{ PROPERTIES_FILE },
{ PROXY_USER },