aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormeiyoula <1039320815@qq.com>2015-12-04 16:50:40 -0800
committerMarcelo Vanzin <vanzin@cloudera.com>2015-12-04 16:50:43 -0800
commitbbfc16ec9d690c2dfa20896bd6d33f9783b9c109 (patch)
treee1da668a41c2ce9319bf78cc751ef4fc54feba92
parentb7204e1d41271d2e8443484371770936664350b1 (diff)
downloadspark-bbfc16ec9d690c2dfa20896bd6d33f9783b9c109.tar.gz
spark-bbfc16ec9d690c2dfa20896bd6d33f9783b9c109.tar.bz2
spark-bbfc16ec9d690c2dfa20896bd6d33f9783b9c109.zip
[SPARK-12142][CORE]Reply false when container allocator is not ready and reset target
Using Dynamic Allocation function, when a new AM is starting, and ExecutorAllocationManager send RequestExecutor message to AM. If the container allocator is not ready, the whole app will hang on Author: meiyoula <1039320815@qq.com> Closes #10138 from XuTingjun/patch-1.
-rw-r--r--core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala1
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala3
2 files changed, 3 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 6419218f47..34c32ce312 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -370,6 +370,7 @@ private[spark] class ExecutorAllocationManager(
} else {
logWarning(
s"Unable to reach the cluster manager to request $numExecutorsTarget total executors!")
+ numExecutorsTarget = oldNumExecutorsTarget
0
}
}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 13ef4dfd64..1970f7d150 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -600,11 +600,12 @@ private[spark] class ApplicationMaster(
localityAwareTasks, hostToLocalTaskCount)) {
resetAllocatorInterval()
}
+ context.reply(true)
case None =>
logWarning("Container allocator is not ready to request executors yet.")
+ context.reply(false)
}
- context.reply(true)
case KillExecutors(executorIds) =>
logInfo(s"Driver requested to kill executor(s) ${executorIds.mkString(", ")}.")