[SPARK-9284] [TESTS] Allow all tests to run without an assembly.

This change aims at speeding up the dev cycle a little bit, by making sure that all tests behave the same w.r.t. where the code to be tested is loaded from. Namely, that means that tests don't rely on the assembly anymore, rather loading all needed classes from the build directories. The main change is to make sure all build directories (classes and test-classes) are added to the classpath of child processes when running tests. YarnClusterSuite required some custom code since the executors are run differently (i.e. not through the launcher library, like standalone and Mesos do). I also found a couple of tests that could leak a SparkContext on failure, and added code to handle those. With this patch, it's possible to run the following command from a clean source directory and have all tests pass: mvn -Pyarn -Phadoop-2.4 -Phive-thriftserver install Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #7629 from vanzin/SPARK-9284.
author: Marcelo Vanzin <vanzin@cloudera.com> 2015-08-28 12:33:40 -0700
committer: Marcelo Vanzin <vanzin@cloudera.com> 2015-08-28 12:33:40 -0700
commit: c53c902fa9c458200245f919067b41dde9cd9418 (patch)
tree: c81fe06b5b0a110b308fbdfadbe63687369f6610 /yarn/src
parent: d3f87dc39480f075170817bbd00142967a938078 (diff)
download: spark-c53c902fa9c458200245f919067b41dde9cd9418.tar.gz
spark-c53c902fa9c458200245f919067b41dde9cd9418.tar.bz2
spark-c53c902fa9c458200245f919067b41dde9cd9418.zip
4 files changed, 79 insertions, 16 deletions
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index b4f8049bff..17c59ff06e 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.server.MiniYARNCluster
 import org.scalatest.{BeforeAndAfterAll, Matchers}
 
 import org.apache.spark._
+import org.apache.spark.launcher.TestClasspathBuilder
 import org.apache.spark.util.Utils
 
 abstract class BaseYarnClusterSuite
@@ -43,6 +44,9 @@ abstract class BaseYarnClusterSuite
     |log4j.appender.console.target=System.err
     |log4j.appender.console.layout=org.apache.log4j.PatternLayout
     |log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+    |log4j.logger.org.apache.hadoop=WARN
+    |log4j.logger.org.eclipse.jetty=WARN
+    |log4j.logger.org.spark-project.jetty=WARN
     """.stripMargin
 
   private var yarnCluster: MiniYARNCluster = _
@@ -51,8 +55,7 @@ abstract class BaseYarnClusterSuite
   private var hadoopConfDir: File = _
   private var logConfDir: File = _
 
-
-  def yarnConfig: YarnConfiguration
+  def newYarnConfig(): YarnConfiguration
 
   override def beforeAll() {
     super.beforeAll()
@@ -65,8 +68,14 @@ abstract class BaseYarnClusterSuite
     val logConfFile = new File(logConfDir, "log4j.properties")
     Files.write(LOG4J_CONF, logConfFile, UTF_8)
 
+    // Disable the disk utilization check to avoid the test hanging when people's disks are
+    // getting full.
+    val yarnConf = newYarnConfig()
+    yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
+      "100.0")
+
     yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
-    yarnCluster.init(yarnConfig)
+    yarnCluster.init(yarnConf)
     yarnCluster.start()
 
     // There's a race in MiniYARNCluster in which start() may return before the RM has updated
@@ -114,19 +123,23 @@ abstract class BaseYarnClusterSuite
       sparkArgs: Seq[String] = Nil,
       extraClassPath: Seq[String] = Nil,
       extraJars: Seq[String] = Nil,
-      extraConf: Map[String, String] = Map()): Unit = {
+      extraConf: Map[String, String] = Map(),
+      extraEnv: Map[String, String] = Map()): Unit = {
     val master = if (clientMode) "yarn-client" else "yarn-cluster"
     val props = new Properties()
 
     props.setProperty("spark.yarn.jar", "local:" + fakeSparkJar.getAbsolutePath())
 
-    val childClasspath = logConfDir.getAbsolutePath() +
-      File.pathSeparator +
-      sys.props("java.class.path") +
-      File.pathSeparator +
-      extraClassPath.mkString(File.pathSeparator)
-    props.setProperty("spark.driver.extraClassPath", childClasspath)
-    props.setProperty("spark.executor.extraClassPath", childClasspath)
+    val testClasspath = new TestClasspathBuilder()
+      .buildClassPath(
+        logConfDir.getAbsolutePath() +
+        File.pathSeparator +
+        extraClassPath.mkString(File.pathSeparator))
+      .asScala
+      .mkString(File.pathSeparator)
+
+    props.setProperty("spark.driver.extraClassPath", testClasspath)
+    props.setProperty("spark.executor.extraClassPath", testClasspath)
 
     // SPARK-4267: make sure java options are propagated correctly.
     props.setProperty("spark.driver.extraJavaOptions", "-Dfoo=\"one two three\"")
@@ -168,7 +181,7 @@ abstract class BaseYarnClusterSuite
       appArgs
 
     Utils.executeAndGetOutput(argv,
-      extraEnvironment = Map("YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath()))
+      extraEnvironment = Map("YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath()) ++ extraEnv)
   }
 
   /**
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 5a4ea2ea2f..b5a42fd6af 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -28,7 +28,9 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.scalatest.Matchers
 
 import org.apache.spark._
-import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart, SparkListenerExecutorAdded}
+import org.apache.spark.launcher.TestClasspathBuilder
+import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart,
+  SparkListenerExecutorAdded}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.util.Utils
 
@@ -39,7 +41,7 @@ import org.apache.spark.util.Utils
  */
 class YarnClusterSuite extends BaseYarnClusterSuite {
 
-  override def yarnConfig: YarnConfiguration = new YarnConfiguration()
+  override def newYarnConfig(): YarnConfiguration = new YarnConfiguration()
 
   private val TEST_PYFILE = """
     |import mod1, mod2
@@ -111,6 +113,17 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     val primaryPyFile = new File(tempDir, "test.py")
     Files.write(TEST_PYFILE, primaryPyFile, UTF_8)
 
+    // When running tests, let's not assume the user has built the assembly module, which also
+    // creates the pyspark archive. Instead, let's use PYSPARK_ARCHIVES_PATH to point at the
+    // needed locations.
+    val sparkHome = sys.props("spark.test.home");
+    val pythonPath = Seq(
+        s"$sparkHome/python/lib/py4j-0.8.2.1-src.zip",
+        s"$sparkHome/python")
+    val extraEnv = Map(
+      "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
+      "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator))
+
     val moduleDir =
       if (clientMode) {
         // In client-mode, .py files added with --py-files are not visible in the driver.
@@ -130,7 +143,8 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
 
     runSpark(clientMode, primaryPyFile.getAbsolutePath(),
       sparkArgs = Seq("--py-files", pyFiles),
-      appArgs = Seq(result.getAbsolutePath()))
+      appArgs = Seq(result.getAbsolutePath()),
+      extraEnv = extraEnv)
     checkResult(result)
   }
 
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index 5e8238822b..8d9c9b3004 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
  */
 class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
 
-  override def yarnConfig: YarnConfiguration = {
+  override def newYarnConfig(): YarnConfiguration = {
     val yarnConfig = new YarnConfiguration()
     yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle")
     yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
diff --git a/yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala b/yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala
new file mode 100644
index 0000000000..da9e8e21a2
--- /dev/null
+++ b/yarn/src/test/scala/org/apache/spark/launcher/TestClasspathBuilder.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher
+
+import java.util.{List => JList, Map => JMap}
+
+/**
+ * Exposes AbstractCommandBuilder to the YARN tests, so that they can build classpaths the same
+ * way other cluster managers do.
+ */
+private[spark] class TestClasspathBuilder extends AbstractCommandBuilder {
+
+  childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, sys.props("spark.test.home"))
+
+  override def buildClassPath(extraCp: String): JList[String] = super.buildClassPath(extraCp)
+
+  /** Not used by the YARN tests. */
+  override def buildCommand(env: JMap[String, String]): JList[String] =
+    throw new UnsupportedOperationException()
+
+}
author	Marcelo Vanzin <vanzin@cloudera.com>	2015-08-28 12:33:40 -0700
committer	Marcelo Vanzin <vanzin@cloudera.com>	2015-08-28 12:33:40 -0700
commit	c53c902fa9c458200245f919067b41dde9cd9418 (patch)
tree	c81fe06b5b0a110b308fbdfadbe63687369f6610 /yarn/src
parent	d3f87dc39480f075170817bbd00142967a938078 (diff)
download	spark-c53c902fa9c458200245f919067b41dde9cd9418.tar.gz spark-c53c902fa9c458200245f919067b41dde9cd9418.tar.bz2 spark-c53c902fa9c458200245f919067b41dde9cd9418.zip