aboutsummaryrefslogtreecommitdiff
path: root/yarn/src/test/scala/org
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2016-08-17 11:12:21 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2016-08-17 11:12:21 -0700
commite3fec51fa1ed161789ab7aa32ed36efe357b5d31 (patch)
tree963eced70149018b74120f532e97484c0d69226e /yarn/src/test/scala/org
parent928ca1c6d12b23d84f9b6205e22d2e756311f072 (diff)
downloadspark-e3fec51fa1ed161789ab7aa32ed36efe357b5d31.tar.gz
spark-e3fec51fa1ed161789ab7aa32ed36efe357b5d31.tar.bz2
spark-e3fec51fa1ed161789ab7aa32ed36efe357b5d31.zip
[SPARK-16930][YARN] Fix a couple of races in cluster app initialization.
There are two narrow races that could cause the ApplicationMaster to miss when the user application instantiates the SparkContext, which could cause app failures when nothing was wrong with the app. It was also possible for a failing application to get stuck in the loop that waits for the context for a long time, instead of failing quickly. The change uses a promise to track the SparkContext instance, which gets rid of the races and allows for some simplification of the code. Tested with existing unit tests, and a new one being added to test the timeout code. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #14542 from vanzin/SPARK-16930.
Diffstat (limited to 'yarn/src/test/scala/org')
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala22
1 files changed, 22 insertions, 0 deletions
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 8ab7b21c22..fb7926f6a1 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -33,6 +33,7 @@ import org.scalatest.concurrent.Eventually._
import org.apache.spark._
import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.launcher._
import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart,
@@ -192,6 +193,14 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
}
}
+ test("timeout to get SparkContext in cluster mode triggers failure") {
+ val timeout = 2000
+ val finalState = runSpark(false, mainClassName(SparkContextTimeoutApp.getClass),
+ appArgs = Seq((timeout * 4).toString),
+ extraConf = Map(AM_MAX_WAIT_TIME.key -> timeout.toString))
+ finalState should be (SparkAppHandle.State.FAILED)
+ }
+
private def testBasicYarnApp(clientMode: Boolean, conf: Map[String, String] = Map()): Unit = {
val result = File.createTempFile("result", null, tempDir)
val finalState = runSpark(clientMode, mainClassName(YarnClusterDriver.getClass),
@@ -469,3 +478,16 @@ private object YarnLauncherTestApp {
}
}
+
+/**
+ * Used to test code in the AM that detects the SparkContext instance. Expects a single argument
+ * with the duration to sleep for, in ms.
+ */
+private object SparkContextTimeoutApp {
+
+ def main(args: Array[String]): Unit = {
+ val Array(sleepTime) = args
+ Thread.sleep(java.lang.Long.parseLong(sleepTime))
+ }
+
+}