aboutsummaryrefslogtreecommitdiff
path: root/yarn
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2015-08-28 12:33:40 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2015-08-28 12:33:40 -0700
commitc53c902fa9c458200245f919067b41dde9cd9418 (patch)
treec81fe06b5b0a110b308fbdfadbe63687369f6610 /yarn
parentd3f87dc39480f075170817bbd00142967a938078 (diff)
downloadspark-c53c902fa9c458200245f919067b41dde9cd9418.tar.gz
spark-c53c902fa9c458200245f919067b41dde9cd9418.tar.bz2
spark-c53c902fa9c458200245f919067b41dde9cd9418.zip
[SPARK-9284] [TESTS] Allow all tests to run without an assembly.
This change aims at speeding up the dev cycle a little bit, by making sure that all tests behave the same w.r.t. where the code to be tested is loaded from. Namely, that means that tests don't rely on the assembly anymore, rather loading all needed classes from the build directories. The main change is to make sure all build directories (classes and test-classes) are added to the classpath of child processes when running tests. YarnClusterSuite required some custom code since the executors are run differently (i.e. not through the launcher library, like standalone and Mesos do). I also found a couple of tests that could leak a SparkContext on failure, and added code to handle those. With this patch, it's possible to run the following command from a clean source directory and have all tests pass: mvn -Pyarn -Phadoop-2.4 -Phive-thriftserver install Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #7629 from vanzin/SPARK-9284.
Diffstat (limited to 'yarn')
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala37
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala20
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala2
-rw-r--r--yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala36
4 files changed, 79 insertions, 16 deletions
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index b4f8049bff..17c59ff06e 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.server.MiniYARNCluster
import org.scalatest.{BeforeAndAfterAll, Matchers}
import org.apache.spark._
+import org.apache.spark.launcher.TestClasspathBuilder
import org.apache.spark.util.Utils
abstract class BaseYarnClusterSuite
@@ -43,6 +44,9 @@ abstract class BaseYarnClusterSuite
|log4j.appender.console.target=System.err
|log4j.appender.console.layout=org.apache.log4j.PatternLayout
|log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+ |log4j.logger.org.apache.hadoop=WARN
+ |log4j.logger.org.eclipse.jetty=WARN
+ |log4j.logger.org.spark-project.jetty=WARN
""".stripMargin
private var yarnCluster: MiniYARNCluster = _
@@ -51,8 +55,7 @@ abstract class BaseYarnClusterSuite
private var hadoopConfDir: File = _
private var logConfDir: File = _
-
- def yarnConfig: YarnConfiguration
+ def newYarnConfig(): YarnConfiguration
override def beforeAll() {
super.beforeAll()
@@ -65,8 +68,14 @@ abstract class BaseYarnClusterSuite
val logConfFile = new File(logConfDir, "log4j.properties")
Files.write(LOG4J_CONF, logConfFile, UTF_8)
+ // Disable the disk utilization check to avoid the test hanging when people's disks are
+ // getting full.
+ val yarnConf = newYarnConfig()
+ yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
+ "100.0")
+
yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
- yarnCluster.init(yarnConfig)
+ yarnCluster.init(yarnConf)
yarnCluster.start()
// There's a race in MiniYARNCluster in which start() may return before the RM has updated
@@ -114,19 +123,23 @@ abstract class BaseYarnClusterSuite
sparkArgs: Seq[String] = Nil,
extraClassPath: Seq[String] = Nil,
extraJars: Seq[String] = Nil,
- extraConf: Map[String, String] = Map()): Unit = {
+ extraConf: Map[String, String] = Map(),
+ extraEnv: Map[String, String] = Map()): Unit = {
val master = if (clientMode) "yarn-client" else "yarn-cluster"
val props = new Properties()
props.setProperty("spark.yarn.jar", "local:" + fakeSparkJar.getAbsolutePath())
- val childClasspath = logConfDir.getAbsolutePath() +
- File.pathSeparator +
- sys.props("java.class.path") +
- File.pathSeparator +
- extraClassPath.mkString(File.pathSeparator)
- props.setProperty("spark.driver.extraClassPath", childClasspath)
- props.setProperty("spark.executor.extraClassPath", childClasspath)
+ val testClasspath = new TestClasspathBuilder()
+ .buildClassPath(
+ logConfDir.getAbsolutePath() +
+ File.pathSeparator +
+ extraClassPath.mkString(File.pathSeparator))
+ .asScala
+ .mkString(File.pathSeparator)
+
+ props.setProperty("spark.driver.extraClassPath", testClasspath)
+ props.setProperty("spark.executor.extraClassPath", testClasspath)
// SPARK-4267: make sure java options are propagated correctly.
props.setProperty("spark.driver.extraJavaOptions", "-Dfoo=\"one two three\"")
@@ -168,7 +181,7 @@ abstract class BaseYarnClusterSuite
appArgs
Utils.executeAndGetOutput(argv,
- extraEnvironment = Map("YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath()))
+ extraEnvironment = Map("YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath()) ++ extraEnv)
}
/**
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 5a4ea2ea2f..b5a42fd6af 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -28,7 +28,9 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.scalatest.Matchers
import org.apache.spark._
-import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart, SparkListenerExecutorAdded}
+import org.apache.spark.launcher.TestClasspathBuilder
+import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart,
+ SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.Utils
@@ -39,7 +41,7 @@ import org.apache.spark.util.Utils
*/
class YarnClusterSuite extends BaseYarnClusterSuite {
- override def yarnConfig: YarnConfiguration = new YarnConfiguration()
+ override def newYarnConfig(): YarnConfiguration = new YarnConfiguration()
private val TEST_PYFILE = """
|import mod1, mod2
@@ -111,6 +113,17 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
val primaryPyFile = new File(tempDir, "test.py")
Files.write(TEST_PYFILE, primaryPyFile, UTF_8)
+ // When running tests, let's not assume the user has built the assembly module, which also
+ // creates the pyspark archive. Instead, let's use PYSPARK_ARCHIVES_PATH to point at the
+ // needed locations.
+ val sparkHome = sys.props("spark.test.home");
+ val pythonPath = Seq(
+ s"$sparkHome/python/lib/py4j-0.8.2.1-src.zip",
+ s"$sparkHome/python")
+ val extraEnv = Map(
+ "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
+ "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator))
+
val moduleDir =
if (clientMode) {
// In client-mode, .py files added with --py-files are not visible in the driver.
@@ -130,7 +143,8 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
runSpark(clientMode, primaryPyFile.getAbsolutePath(),
sparkArgs = Seq("--py-files", pyFiles),
- appArgs = Seq(result.getAbsolutePath()))
+ appArgs = Seq(result.getAbsolutePath()),
+ extraEnv = extraEnv)
checkResult(result)
}
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index 5e8238822b..8d9c9b3004 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
*/
class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
- override def yarnConfig: YarnConfiguration = {
+ override def newYarnConfig(): YarnConfiguration = {
val yarnConfig = new YarnConfiguration()
yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
diff --git a/yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala b/yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala
new file mode 100644
index 0000000000..da9e8e21a2
--- /dev/null
+++ b/yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher
+
+import java.util.{List => JList, Map => JMap}
+
+/**
+ * Exposes AbstractCommandBuilder to the YARN tests, so that they can build classpaths the same
+ * way other cluster managers do.
+ */
+private[spark] class TestClasspathBuilder extends AbstractCommandBuilder {
+
+ childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, sys.props("spark.test.home"))
+
+ override def buildClassPath(extraCp: String): JList[String] = super.buildClassPath(extraCp)
+
+ /** Not used by the YARN tests. */
+ override def buildCommand(env: JMap[String, String]): JList[String] =
+ throw new UnsupportedOperationException()
+
+}