aboutsummaryrefslogtreecommitdiff
path: root/yarn/stable
diff options
context:
space:
mode:
authorSandy Ryza <sandy@cloudera.com>2014-02-28 09:40:47 -0600
committerThomas Graves <tgraves@apache.org>2014-02-28 09:40:47 -0600
commit5f419bf9f433e8f057237f1d5bfed9f5f4e9427c (patch)
tree796cda45fed79d96e6364a42880f4abd1f08e430 /yarn/stable
parentedf8a56ab7eaee1f7c3b4579eb10464984d31d7a (diff)
downloadspark-5f419bf9f433e8f057237f1d5bfed9f5f4e9427c.tar.gz
spark-5f419bf9f433e8f057237f1d5bfed9f5f4e9427c.tar.bz2
spark-5f419bf9f433e8f057237f1d5bfed9f5f4e9427c.zip
SPARK-1032. If Yarn app fails before registering, app master stays aroun...
...d long after This reopens https://github.com/apache/incubator-spark/pull/648 against the new repo. Author: Sandy Ryza <sandy@cloudera.com> Closes #28 from sryza/sandy-spark-1032 and squashes the following commits: 5953f50 [Sandy Ryza] SPARK-1032. If Yarn app fails before registering, app master stays around long after
Diffstat (limited to 'yarn/stable')
-rw-r--r--yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala22
1 files changed, 16 insertions, 6 deletions
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 4b777d5fa7..0f58c49c69 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -67,6 +67,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
private val maxNumWorkerFailures = sparkConf.getInt("spark.yarn.max.worker.failures",
math.max(args.numWorkers * 2, 3))
+ private var registered = false
+
def run() {
// Setup the directories so things go to YARN approved directories rather
// than user specified and /tmp.
@@ -99,7 +101,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
waitForSparkContextInitialized()
// Do this after Spark master is up and SparkContext is created so that we can register UI Url.
- val appMasterResponse: RegisterApplicationMasterResponse = registerApplicationMaster()
+ synchronized {
+ if (!isFinished) {
+ registerApplicationMaster()
+ registered = true
+ }
+ }
// Allocate all containers
allocateWorkers()
@@ -180,7 +187,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
var numTries = 0
val waitTime = 10000L
val maxNumTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 10)
- while (ApplicationMaster.sparkContextRef.get() == null && numTries < maxNumTries) {
+ while (ApplicationMaster.sparkContextRef.get() == null && numTries < maxNumTries
+ && !isFinished) {
logInfo("Waiting for Spark context initialization ... " + numTries)
numTries = numTries + 1
ApplicationMaster.sparkContextRef.wait(waitTime)
@@ -313,11 +321,13 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
return
}
isFinished = true
- }
- logInfo("finishApplicationMaster with " + status)
- // Set tracking URL to empty since we don't have a history server.
- amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */)
+ logInfo("finishApplicationMaster with " + status)
+ if (registered) {
+ // Set tracking URL to empty since we don't have a history server.
+ amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */)
+ }
+ }
}
/**