aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-02-17 17:15:43 -0800
committerPatrick Wendell <patrick@databricks.com>2015-02-17 17:23:22 -0800
commitae6cfb3acdbc2721d25793698a4a440f0519dbec (patch)
tree4dba3eaff24a4d042ac6e9e0a3e1b8c5c6108f14 /core
parentc3d2b90bde2e11823909605d518167548df66bd8 (diff)
downloadspark-ae6cfb3acdbc2721d25793698a4a440f0519dbec.tar.gz
spark-ae6cfb3acdbc2721d25793698a4a440f0519dbec.tar.bz2
spark-ae6cfb3acdbc2721d25793698a4a440f0519dbec.zip
[SPARK-5811] Added documentation for maven coordinates and added Spark Packages support
Documentation for maven coordinates + Spark Package support. Added pyspark tests for `--packages` Author: Burak Yavuz <brkyvz@gmail.com> Author: Davies Liu <davies@databricks.com> Closes #4662 from brkyvz/SPARK-5811 and squashes the following commits: 56ccccd [Burak Yavuz] fixed broken test 64cb8ee [Burak Yavuz] passed pep8 on local c07b81e [Burak Yavuz] fixed pep8 a8bd6b7 [Burak Yavuz] submit PR 4ef4046 [Burak Yavuz] ready for PR 8fb02e5 [Burak Yavuz] merged master 25c9b9f [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into python-jar 560d13b [Burak Yavuz] before PR 17d3f76 [Davies Liu] support .jar as python package a3eb717 [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into SPARK-5811 c60156d [Burak Yavuz] [SPARK-5811] Added documentation for maven coordinates
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala52
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala13
2 files changed, 45 insertions, 20 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 012a89a31b..4c4110812e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -252,6 +252,26 @@ object SparkSubmit {
val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
+ // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
+ // too for packages that include Python code
+ val resolvedMavenCoordinates =
+ SparkSubmitUtils.resolveMavenCoordinates(
+ args.packages, Option(args.repositories), Option(args.ivyRepoPath))
+ if (!resolvedMavenCoordinates.trim.isEmpty) {
+ if (args.jars == null || args.jars.trim.isEmpty) {
+ args.jars = resolvedMavenCoordinates
+ } else {
+ args.jars += s",$resolvedMavenCoordinates"
+ }
+ if (args.isPython) {
+ if (args.pyFiles == null || args.pyFiles.trim.isEmpty) {
+ args.pyFiles = resolvedMavenCoordinates
+ } else {
+ args.pyFiles += s",$resolvedMavenCoordinates"
+ }
+ }
+ }
+
// Require all python files to be local, so we can add them to the PYTHONPATH
// In YARN cluster mode, python files are distributed as regular files, which can be non-local
if (args.isPython && !isYarnCluster) {
@@ -307,18 +327,6 @@ object SparkSubmit {
// Special flag to avoid deprecation warnings at the client
sysProps("SPARK_SUBMIT") = "true"
- // Resolve maven dependencies if there are any and add classpath to jars
- val resolvedMavenCoordinates =
- SparkSubmitUtils.resolveMavenCoordinates(
- args.packages, Option(args.repositories), Option(args.ivyRepoPath))
- if (!resolvedMavenCoordinates.trim.isEmpty) {
- if (args.jars == null || args.jars.trim.isEmpty) {
- args.jars = resolvedMavenCoordinates
- } else {
- args.jars += s",$resolvedMavenCoordinates"
- }
- }
-
// A list of rules to map each argument to system properties or command-line options in
// each deploy mode; we iterate through these below
val options = List[OptionAssigner](
@@ -646,13 +654,15 @@ private[spark] object SparkSubmitUtils {
private[spark] case class MavenCoordinate(groupId: String, artifactId: String, version: String)
/**
- * Extracts maven coordinates from a comma-delimited string
+ * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided
+ * in the format `groupId:artifactId:version` or `groupId/artifactId:version`. The latter provides
+ * simplicity for Spark Package users.
* @param coordinates Comma-delimited string of maven coordinates
* @return Sequence of Maven coordinates
*/
private[spark] def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = {
coordinates.split(",").map { p =>
- val splits = p.split(":")
+ val splits = p.replace("/", ":").split(":")
require(splits.length == 3, s"Provided Maven Coordinates must be in the form " +
s"'groupId:artifactId:version'. The coordinate provided is: $p")
require(splits(0) != null && splits(0).trim.nonEmpty, s"The groupId cannot be null or " +
@@ -682,6 +692,13 @@ private[spark] object SparkSubmitUtils {
br.setName("central")
cr.add(br)
+ val sp: IBiblioResolver = new IBiblioResolver
+ sp.setM2compatible(true)
+ sp.setUsepoms(true)
+ sp.setRoot("http://dl.bintray.com/spark-packages/maven")
+ sp.setName("spark-packages")
+ cr.add(sp)
+
val repositoryList = remoteRepos.getOrElse("")
// add any other remote repositories other than maven central
if (repositoryList.trim.nonEmpty) {
@@ -794,14 +811,19 @@ private[spark] object SparkSubmitUtils {
val md = getModuleDescriptor
md.setDefaultConf(ivyConfName)
- // Add an exclusion rule for Spark
+ // Add an exclusion rule for Spark and Scala Library
val sparkArtifacts = new ArtifactId(new ModuleId("org.apache.spark", "*"), "*", "*", "*")
val sparkDependencyExcludeRule =
new DefaultExcludeRule(sparkArtifacts, ivySettings.getMatcher("glob"), null)
sparkDependencyExcludeRule.addConfiguration(ivyConfName)
+ val scalaArtifacts = new ArtifactId(new ModuleId("*", "scala-library"), "*", "*", "*")
+ val scalaDependencyExcludeRule =
+ new DefaultExcludeRule(scalaArtifacts, ivySettings.getMatcher("glob"), null)
+ scalaDependencyExcludeRule.addConfiguration(ivyConfName)
// Exclude any Spark dependencies, and add all supplied maven artifacts as dependencies
md.addExcludeRule(sparkDependencyExcludeRule)
+ md.addExcludeRule(scalaDependencyExcludeRule)
addDependenciesToIvy(md, artifacts, ivyConfName)
// resolve dependencies
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 5366535001..ad62b35f62 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -57,20 +57,23 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll {
test("create repo resolvers") {
val resolver1 = SparkSubmitUtils.createRepoResolvers(None)
- // should have central by default
- assert(resolver1.getResolvers.size() === 1)
+ // should have central and spark-packages by default
+ assert(resolver1.getResolvers.size() === 2)
assert(resolver1.getResolvers.get(0).asInstanceOf[IBiblioResolver].getName === "central")
+ assert(resolver1.getResolvers.get(1).asInstanceOf[IBiblioResolver].getName === "spark-packages")
val repos = "a/1,b/2,c/3"
val resolver2 = SparkSubmitUtils.createRepoResolvers(Option(repos))
- assert(resolver2.getResolvers.size() === 4)
+ assert(resolver2.getResolvers.size() === 5)
val expected = repos.split(",").map(r => s"$r/")
resolver2.getResolvers.toArray.zipWithIndex.foreach { case (resolver: IBiblioResolver, i) =>
if (i == 0) {
assert(resolver.getName === "central")
+ } else if (i == 1) {
+ assert(resolver.getName === "spark-packages")
} else {
- assert(resolver.getName === s"repo-$i")
- assert(resolver.getRoot === expected(i - 1))
+ assert(resolver.getName === s"repo-${i - 1}")
+ assert(resolver.getRoot === expected(i - 2))
}
}
}