aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-06-17 22:33:37 -0700
committerAndrew Or <andrew@databricks.com>2015-06-17 22:33:37 -0700
commit3b6107704fb946e9fcb8c1c9bc4ded1b88c571af (patch)
tree1d5bd80cede3f34c29cf2e5202d36af72f486965 /core
parente2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c (diff)
downloadspark-3b6107704fb946e9fcb8c1c9bc4ded1b88c571af.tar.gz
spark-3b6107704fb946e9fcb8c1c9bc4ded1b88c571af.tar.bz2
spark-3b6107704fb946e9fcb8c1c9bc4ded1b88c571af.zip
[SPARK-8095] Resolve dependencies of --packages in local ivy cache
Dependencies of artifacts in the local ivy cache were not being resolved properly. The dependencies were not being picked up. Now they should be. cc andrewor14 Author: Burak Yavuz <brkyvz@gmail.com> Closes #6788 from brkyvz/local-ivy-fix and squashes the following commits: 2875bf4 [Burak Yavuz] fix temp dir bug 48cc648 [Burak Yavuz] improve deletion a69e3e6 [Burak Yavuz] delete cache before test as well 0037197 [Burak Yavuz] fix merge conflicts f60772c [Burak Yavuz] use different folder for m2 cache during testing b6ef038 [Burak Yavuz] [SPARK-8095] Resolve dependencies of Spark Packages in local ivy cache
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala22
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala124
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala22
3 files changed, 135 insertions, 33 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index cfcc6d3558..abf222757a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -35,7 +35,8 @@ import org.apache.ivy.core.resolve.ResolveOptions
import org.apache.ivy.core.retrieve.RetrieveOptions
import org.apache.ivy.core.settings.IvySettings
import org.apache.ivy.plugins.matcher.GlobPatternMatcher
-import org.apache.ivy.plugins.resolver.{ChainResolver, IBiblioResolver}
+import org.apache.ivy.plugins.repository.file.FileRepository
+import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, IBiblioResolver}
import org.apache.spark.SPARK_VERSION
import org.apache.spark.deploy.rest._
import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}
@@ -735,8 +736,14 @@ private[spark] object SparkSubmitUtils {
}
/** Path of the local Maven cache. */
- private[spark] def m2Path: File = new File(System.getProperty("user.home"),
- ".m2" + File.separator + "repository" + File.separator)
+ private[spark] def m2Path: File = {
+ if (Utils.isTesting) {
+ // test builds delete the maven cache, and this can cause flakiness
+ new File("dummy", ".m2" + File.separator + "repository")
+ } else {
+ new File(System.getProperty("user.home"), ".m2" + File.separator + "repository")
+ }
+ }
/**
* Extracts maven coordinates from a comma-delimited string
@@ -756,12 +763,13 @@ private[spark] object SparkSubmitUtils {
localM2.setName("local-m2-cache")
cr.add(localM2)
- val localIvy = new IBiblioResolver
- localIvy.setRoot(new File(ivySettings.getDefaultIvyUserDir,
- "local" + File.separator).toURI.toString)
+ val localIvy = new FileSystemResolver
+ val localIvyRoot = new File(ivySettings.getDefaultIvyUserDir, "local")
+ localIvy.setLocal(true)
+ localIvy.setRepository(new FileRepository(localIvyRoot))
val ivyPattern = Seq("[organisation]", "[module]", "[revision]", "[type]s",
"[artifact](-[classifier]).[ext]").mkString(File.separator)
- localIvy.setPattern(ivyPattern)
+ localIvy.addIvyPattern(localIvyRoot.getAbsolutePath + File.separator + ivyPattern)
localIvy.setName("local-ivy-cache")
cr.add(localIvy)
diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
index 7d39984424..823050b0aa 100644
--- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
@@ -24,6 +24,8 @@ import com.google.common.io.{Files, ByteStreams}
import org.apache.commons.io.FileUtils
+import org.apache.ivy.core.settings.IvySettings
+
import org.apache.spark.TestUtils.{createCompiledClass, JavaSourceFromString}
import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
@@ -44,13 +46,30 @@ private[deploy] object IvyTestUtils {
if (!useIvyLayout) {
Seq(groupDirs, artifactDirs, artifact.version).mkString(File.separator)
} else {
- Seq(groupDirs, artifactDirs, artifact.version, ext + "s").mkString(File.separator)
+ Seq(artifact.groupId, artifactDirs, artifact.version, ext + "s").mkString(File.separator)
}
new File(prefix, artifactPath)
}
- private def artifactName(artifact: MavenCoordinate, ext: String = ".jar"): String = {
- s"${artifact.artifactId}-${artifact.version}$ext"
+ /** Returns the artifact naming based on standard ivy or maven format. */
+ private def artifactName(
+ artifact: MavenCoordinate,
+ useIvyLayout: Boolean,
+ ext: String = ".jar"): String = {
+ if (!useIvyLayout) {
+ s"${artifact.artifactId}-${artifact.version}$ext"
+ } else {
+ s"${artifact.artifactId}$ext"
+ }
+ }
+
+ /** Returns the directory for the given groupId based on standard ivy or maven format. */
+ private def getBaseGroupDirectory(artifact: MavenCoordinate, useIvyLayout: Boolean): String = {
+ if (!useIvyLayout) {
+ artifact.groupId.replace(".", File.separator)
+ } else {
+ artifact.groupId
+ }
}
/** Write the contents to a file to the supplied directory. */
@@ -92,6 +111,22 @@ private[deploy] object IvyTestUtils {
createCompiledClass(className, dir, sourceFile, Seq.empty)
}
+ private def createDescriptor(
+ tempPath: File,
+ artifact: MavenCoordinate,
+ dependencies: Option[Seq[MavenCoordinate]],
+ useIvyLayout: Boolean): File = {
+ if (useIvyLayout) {
+ val ivyXmlPath = pathFromCoordinate(artifact, tempPath, "ivy", true)
+ Files.createParentDirs(new File(ivyXmlPath, "dummy"))
+ createIvyDescriptor(ivyXmlPath, artifact, dependencies)
+ } else {
+ val pomPath = pathFromCoordinate(artifact, tempPath, "pom", useIvyLayout)
+ Files.createParentDirs(new File(pomPath, "dummy"))
+ createPom(pomPath, artifact, dependencies)
+ }
+ }
+
/** Helper method to write artifact information in the pom. */
private def pomArtifactWriter(artifact: MavenCoordinate, tabCount: Int = 1): String = {
var result = "\n" + " " * tabCount + s"<groupId>${artifact.groupId}</groupId>"
@@ -121,15 +156,55 @@ private[deploy] object IvyTestUtils {
"\n <dependencies>\n" + inside + "\n </dependencies>"
}.getOrElse("")
content += "\n</project>"
- writeFile(dir, artifactName(artifact, ".pom"), content.trim)
+ writeFile(dir, artifactName(artifact, false, ".pom"), content.trim)
+ }
+
+ /** Helper method to write artifact information in the ivy.xml. */
+ private def ivyArtifactWriter(artifact: MavenCoordinate): String = {
+ s"""<dependency org="${artifact.groupId}" name="${artifact.artifactId}"
+ | rev="${artifact.version}" force="true"
+ | conf="compile->compile(*),master(*);runtime->runtime(*)"/>""".stripMargin
+ }
+
+ /** Create a pom file for this artifact. */
+ private def createIvyDescriptor(
+ dir: File,
+ artifact: MavenCoordinate,
+ dependencies: Option[Seq[MavenCoordinate]]): File = {
+ var content = s"""
+ |<?xml version="1.0" encoding="UTF-8"?>
+ |<ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
+ | <info organisation="${artifact.groupId}"
+ | module="${artifact.artifactId}"
+ | revision="${artifact.version}"
+ | status="release" publication="20150405222456" />
+ | <configurations>
+ | <conf name="default" visibility="public" description="" extends="runtime,master"/>
+ | <conf name="compile" visibility="public" description=""/>
+ | <conf name="master" visibility="public" description=""/>
+ | <conf name="runtime" visibility="public" description="" extends="compile"/>
+ | <conf name="pom" visibility="public" description=""/>
+ | </configurations>
+ | <publications>
+ | <artifact name="${artifactName(artifact, true, "")}" type="jar" ext="jar"
+ | conf="master"/>
+ | </publications>
+ """.stripMargin.trim
+ content += dependencies.map { deps =>
+ val inside = deps.map(ivyArtifactWriter).mkString("\n")
+ "\n <dependencies>\n" + inside + "\n </dependencies>"
+ }.getOrElse("")
+ content += "\n</ivy-module>"
+ writeFile(dir, "ivy.xml", content.trim)
}
/** Create the jar for the given maven coordinate, using the supplied files. */
private def packJar(
dir: File,
artifact: MavenCoordinate,
- files: Seq[(String, File)]): File = {
- val jarFile = new File(dir, artifactName(artifact))
+ files: Seq[(String, File)],
+ useIvyLayout: Boolean): File = {
+ val jarFile = new File(dir, artifactName(artifact, useIvyLayout))
val jarFileStream = new FileOutputStream(jarFile)
val jarStream = new JarOutputStream(jarFileStream, new java.util.jar.Manifest())
@@ -187,12 +262,10 @@ private[deploy] object IvyTestUtils {
} else {
Seq(javaFile)
}
- val jarFile = packJar(jarPath, artifact, allFiles)
+ val jarFile = packJar(jarPath, artifact, allFiles, useIvyLayout)
assert(jarFile.exists(), "Problem creating Jar file")
- val pomPath = pathFromCoordinate(artifact, tempPath, "pom", useIvyLayout)
- Files.createParentDirs(new File(pomPath, "dummy"))
- val pomFile = createPom(pomPath, artifact, dependencies)
- assert(pomFile.exists(), "Problem creating Pom file")
+ val descriptor = createDescriptor(tempPath, artifact, dependencies, useIvyLayout)
+ assert(descriptor.exists(), "Problem creating Pom file")
} finally {
FileUtils.deleteDirectory(root)
}
@@ -237,7 +310,10 @@ private[deploy] object IvyTestUtils {
dependencies: Option[String],
rootDir: Option[File],
useIvyLayout: Boolean = false,
- withPython: Boolean = false)(f: String => Unit): Unit = {
+ withPython: Boolean = false,
+ ivySettings: IvySettings = new IvySettings)(f: String => Unit): Unit = {
+ val deps = dependencies.map(SparkSubmitUtils.extractMavenCoordinates)
+ purgeLocalIvyCache(artifact, deps, ivySettings)
val repo = createLocalRepositoryForTests(artifact, dependencies, rootDir, useIvyLayout,
withPython)
try {
@@ -245,17 +321,29 @@ private[deploy] object IvyTestUtils {
} finally {
// Clean up
if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) {
- FileUtils.deleteDirectory(new File(repo,
- artifact.groupId.replace(".", File.separator) + File.separator + artifact.artifactId))
- dependencies.map(SparkSubmitUtils.extractMavenCoordinates).foreach { seq =>
- seq.foreach { dep =>
- FileUtils.deleteDirectory(new File(repo,
- dep.artifactId.replace(".", File.separator)))
+ val groupDir = getBaseGroupDirectory(artifact, useIvyLayout)
+ FileUtils.deleteDirectory(new File(repo, groupDir + File.separator + artifact.artifactId))
+ deps.foreach { _.foreach { dep =>
+ FileUtils.deleteDirectory(new File(repo, getBaseGroupDirectory(dep, useIvyLayout)))
}
}
} else {
FileUtils.deleteDirectory(repo)
}
+ purgeLocalIvyCache(artifact, deps, ivySettings)
+ }
+ }
+
+ /** Deletes the test packages from the ivy cache */
+ private def purgeLocalIvyCache(
+ artifact: MavenCoordinate,
+ dependencies: Option[Seq[MavenCoordinate]],
+ ivySettings: IvySettings): Unit = {
+ // delete the artifact from the cache as well if it already exists
+ FileUtils.deleteDirectory(new File(ivySettings.getDefaultCache, artifact.groupId))
+ dependencies.foreach { _.foreach { dep =>
+ FileUtils.deleteDirectory(new File(ivySettings.getDefaultCache, dep.groupId))
+ }
}
}
}
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 3a8da9fb9e..12c40f0b7d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfterAll
import org.apache.ivy.core.module.descriptor.MDArtifact
import org.apache.ivy.core.settings.IvySettings
-import org.apache.ivy.plugins.resolver.IBiblioResolver
+import org.apache.ivy.plugins.resolver.{AbstractResolver, FileSystemResolver, IBiblioResolver}
import org.apache.spark.SparkFunSuite
import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
@@ -68,7 +68,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
// should have central and spark-packages by default
assert(res1.getResolvers.size() === 4)
assert(res1.getResolvers.get(0).asInstanceOf[IBiblioResolver].getName === "local-m2-cache")
- assert(res1.getResolvers.get(1).asInstanceOf[IBiblioResolver].getName === "local-ivy-cache")
+ assert(res1.getResolvers.get(1).asInstanceOf[FileSystemResolver].getName === "local-ivy-cache")
assert(res1.getResolvers.get(2).asInstanceOf[IBiblioResolver].getName === "central")
assert(res1.getResolvers.get(3).asInstanceOf[IBiblioResolver].getName === "spark-packages")
@@ -76,10 +76,10 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
val resolver2 = SparkSubmitUtils.createRepoResolvers(Option(repos), settings)
assert(resolver2.getResolvers.size() === 7)
val expected = repos.split(",").map(r => s"$r/")
- resolver2.getResolvers.toArray.zipWithIndex.foreach { case (resolver: IBiblioResolver, i) =>
+ resolver2.getResolvers.toArray.zipWithIndex.foreach { case (resolver: AbstractResolver, i) =>
if (i > 3) {
assert(resolver.getName === s"repo-${i - 3}")
- assert(resolver.getRoot === expected(i - 4))
+ assert(resolver.asInstanceOf[IBiblioResolver].getRoot === expected(i - 4))
}
}
}
@@ -112,28 +112,34 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
}
test("search for artifact at local repositories") {
- val main = new MavenCoordinate("my.awesome.lib", "mylib", "0.1")
+ val main = new MavenCoordinate("my.great.lib", "mylib", "0.1")
+ val dep = "my.great.dep:mydep:0.5"
// Local M2 repository
- IvyTestUtils.withRepository(main, None, Some(SparkSubmitUtils.m2Path)) { repo =>
+ IvyTestUtils.withRepository(main, Some(dep), Some(SparkSubmitUtils.m2Path)) { repo =>
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, None,
isTest = true)
assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
+ assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
}
// Local Ivy Repository
val settings = new IvySettings
val ivyLocal = new File(settings.getDefaultIvyUserDir, "local" + File.separator)
- IvyTestUtils.withRepository(main, None, Some(ivyLocal), true) { repo =>
+ IvyTestUtils.withRepository(main, Some(dep), Some(ivyLocal), useIvyLayout = true) { repo =>
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, None,
isTest = true)
assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
+ assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
}
// Local ivy repository with modified home
val dummyIvyLocal = new File(tempIvyPath, "local" + File.separator)
- IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo =>
+ settings.setDefaultIvyUserDir(new File(tempIvyPath))
+ IvyTestUtils.withRepository(main, Some(dep), Some(dummyIvyLocal), useIvyLayout = true,
+ ivySettings = settings) { repo =>
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None,
Some(tempIvyPath), isTest = true)
assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
+ assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
}
}