aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2015-06-08 15:37:28 +0100
committerSean Owen <sowen@cloudera.com>2015-06-08 15:37:28 +0100
commita1d9e5cc60d317ecf8fe390b66b623ae39c4534d (patch)
tree1801288569cadc5f429fd33baf058b2504e11792
parent03ef6be9ce61a13dcd9d8c71298fb4be39119411 (diff)
downloadspark-a1d9e5cc60d317ecf8fe390b66b623ae39c4534d.tar.gz
spark-a1d9e5cc60d317ecf8fe390b66b623ae39c4534d.tar.bz2
spark-a1d9e5cc60d317ecf8fe390b66b623ae39c4534d.zip
[SPARK-8126] [BUILD] Use custom temp directory during build.
Even with all the efforts to cleanup the temp directories created by unit tests, Spark leaves a lot of garbage in /tmp after a test run. This change overrides java.io.tmpdir to place those files under the build directory instead. After an sbt full unit test run, I was left with > 400 MB of temp files. Since they're now under the build dir, it's much easier to clean them up. Also make a slight change to a unit test to make it not pollute the source directory with test data. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #6674 from vanzin/SPARK-8126 and squashes the following commits: 0f8ad41 [Marcelo Vanzin] Make sure tmp dir exists when tests run. 643e916 [Marcelo Vanzin] [MINOR] [BUILD] Use custom temp directory during build.
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala22
-rw-r--r--pom.xml24
-rw-r--r--project/SparkBuild.scala6
3 files changed, 41 insertions, 11 deletions
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 8fda5c8b47..07d261cc42 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -28,9 +28,12 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver
import org.apache.spark.SparkFunSuite
import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
+import org.apache.spark.util.Utils
class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
+ private var tempIvyPath: String = _
+
private val noOpOutputStream = new OutputStream {
def write(b: Int) = {}
}
@@ -47,6 +50,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
super.beforeAll()
// We don't want to write logs during testing
SparkSubmitUtils.printStream = new BufferPrintStream
+ tempIvyPath = Utils.createTempDir(namePrefix = "ivy").getAbsolutePath()
}
test("incorrect maven coordinate throws error") {
@@ -90,21 +94,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
}
test("ivy path works correctly") {
- val ivyPath = "dummy" + File.separator + "ivy"
val md = SparkSubmitUtils.getModuleDescriptor
val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar")
- var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath))
+ var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath))
for (i <- 0 until 3) {
- val index = jPaths.indexOf(ivyPath)
+ val index = jPaths.indexOf(tempIvyPath)
assert(index >= 0)
- jPaths = jPaths.substring(index + ivyPath.length)
+ jPaths = jPaths.substring(index + tempIvyPath.length)
}
val main = MavenCoordinate("my.awesome.lib", "mylib", "0.1")
IvyTestUtils.withRepository(main, None, None) { repo =>
// end to end
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo),
- Option(ivyPath), true)
- assert(jarPath.indexOf(ivyPath) >= 0, "should use non-default ivy path")
+ Option(tempIvyPath), true)
+ assert(jarPath.indexOf(tempIvyPath) >= 0, "should use non-default ivy path")
}
}
@@ -123,13 +126,12 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
}
// Local ivy repository with modified home
- val dummyIvyPath = "dummy" + File.separator + "ivy"
- val dummyIvyLocal = new File(dummyIvyPath, "local" + File.separator)
+ val dummyIvyLocal = new File(tempIvyPath, "local" + File.separator)
IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo =>
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None,
- Some(dummyIvyPath), true)
+ Some(tempIvyPath), true)
assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
- assert(jarPath.indexOf(dummyIvyPath) >= 0, "should be in new ivy path")
+ assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
}
}
diff --git a/pom.xml b/pom.xml
index 67b6375f57..5a5d183e3d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -179,7 +179,7 @@
<parquet.deps.scope>compile</parquet.deps.scope>
<!--
- Overridable test home. So that you can call individual pom files directory without
+ Overridable test home. So that you can call individual pom files directly without
things breaking.
-->
<spark.test.home>${session.executionRootDirectory}</spark.test.home>
@@ -1256,6 +1256,7 @@
<systemProperties>
<derby.system.durability>test</derby.system.durability>
<java.awt.headless>true</java.awt.headless>
+ <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
<spark.test.home>${spark.test.home}</spark.test.home>
<spark.testing>1</spark.testing>
<spark.ui.enabled>false</spark.ui.enabled>
@@ -1289,6 +1290,7 @@
<systemProperties>
<derby.system.durability>test</derby.system.durability>
<java.awt.headless>true</java.awt.headless>
+ <java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
<spark.test.home>${spark.test.home}</spark.test.home>
<spark.testing>1</spark.testing>
<spark.ui.enabled>false</spark.ui.enabled>
@@ -1548,6 +1550,26 @@
</execution>
</executions>
</plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>create-tmp-dir</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <configuration>
+ <target>
+ <mkdir dir="${project.build.directory}/tmp" />
+ </target>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
<!-- Enable surefire and scalatest in all children, in one place: -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index ef3a175bac..d7e374558c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -51,6 +51,11 @@ object BuildCommons {
// Root project.
val spark = ProjectRef(buildLocation, "spark")
val sparkHome = buildLocation
+
+ val testTempDir = s"$sparkHome/target/tmp"
+ if (!new File(testTempDir).isDirectory()) {
+ require(new File(testTempDir).mkdirs())
+ }
}
object SparkBuild extends PomBuild {
@@ -496,6 +501,7 @@ object TestSettings {
"SPARK_DIST_CLASSPATH" ->
(fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
"JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
+ javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir",
javaOptions in Test += "-Dspark.test.home=" + sparkHome,
javaOptions in Test += "-Dspark.testing=1",
javaOptions in Test += "-Dspark.port.maxRetries=100",