aboutsummaryrefslogtreecommitdiff
path: root/core/src/main
diff options
context:
space:
mode:
authorSun Rui <sunrui2016@gmail.com>2016-08-09 09:39:45 +0100
committerSean Owen <sowen@cloudera.com>2016-08-09 09:39:45 +0100
commitaf710e5bdda9da04dbba615e219e7e496ca82acc (patch)
tree937445876c6f31ab8aaa09e5d36726450a5e10e6 /core/src/main
parent801e4d097f45b269a9c6b25723d925f3e24ba498 (diff)
downloadspark-af710e5bdda9da04dbba615e219e7e496ca82acc.tar.gz
spark-af710e5bdda9da04dbba615e219e7e496ca82acc.tar.bz2
spark-af710e5bdda9da04dbba615e219e7e496ca82acc.zip
[SPARK-16522][MESOS] Spark application throws exception on exit.
## What changes were proposed in this pull request? Spark applications running on Mesos throw exception upon exit. For details, refer to https://issues.apache.org/jira/browse/SPARK-16522. I am not sure if there is any better fix, so wait for review comments. ## How was this patch tested? Manual test. Observed that the exception is gone upon application exit. Author: Sun Rui <sunrui2016@gmail.com> Closes #14175 from sun-rui/SPARK-16522.
Diffstat (limited to 'core/src/main')
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala7
1 files changed, 6 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 263e6197a6..5177557132 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -553,7 +553,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
taskId: String,
reason: String): Unit = {
stateLock.synchronized {
- removeExecutor(taskId, SlaveLost(reason))
+ // Do not call removeExecutor() after this scheduler backend was stopped because
+ // removeExecutor() internally will send a message to the driver endpoint but
+ // the driver endpoint is not available now, otherwise an exception will be thrown.
+ if (!stopCalled) {
+ removeExecutor(taskId, SlaveLost(reason))
+ }
slaves(slaveId).taskIDs.remove(taskId)
}
}