diff options
author | Jongyoul Lee <jongyoul@gmail.com> | 2015-03-20 12:24:34 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-03-20 12:24:34 +0000 |
commit | 116c553fd6f6d2adcbbf000cd80b5c46d4516e87 (patch) | |
tree | b23f2a1fd1fde449e23506921f19ba03d18b9600 /core | |
parent | 0745a305fac622a6eeb8aa4a7401205a14252939 (diff) | |
download | spark-116c553fd6f6d2adcbbf000cd80b5c46d4516e87.tar.gz spark-116c553fd6f6d2adcbbf000cd80b5c46d4516e87.tar.bz2 spark-116c553fd6f6d2adcbbf000cd80b5c46d4516e87.zip |
[SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR
- Made TaskState.isFailed for handling TASK_LOST and TASK_ERROR and synchronizing CoarseMesosSchedulerBackend and MesosSchedulerBackend
- This is related #5000
Author: Jongyoul Lee <jongyoul@gmail.com>
Closes #5088 from jongyoul/SPARK-6286-1 and squashes the following commits:
4f2362f [Jongyoul Lee] [SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR - Fixed scalastyle
ac4336a [Jongyoul Lee] [SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR - Made TaskState.isFailed for handling TASK_LOST and TASK_ERROR and synchronizing CoarseMesosSchedulerBackend and MesosSchedulerBackend
Diffstat (limited to 'core')
3 files changed, 5 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/TaskState.scala b/core/src/main/scala/org/apache/spark/TaskState.scala index d85a6d6834..c415fe99b1 100644 --- a/core/src/main/scala/org/apache/spark/TaskState.scala +++ b/core/src/main/scala/org/apache/spark/TaskState.scala @@ -27,6 +27,8 @@ private[spark] object TaskState extends Enumeration { type TaskState = Value + def isFailed(state: TaskState) = (LOST == state) || (FAILED == state) + def isFinished(state: TaskState) = FINISHED_STATES.contains(state) def toMesos(state: TaskState): MesosTaskState = state match { diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala index fc92b9c35c..e13de0f46e 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala @@ -277,7 +277,7 @@ private[spark] class CoarseMesosSchedulerBackend( coresByTaskId -= taskId } // If it was a failure, mark the slave as failed for blacklisting purposes - if (state == MesosTaskState.TASK_FAILED || state == MesosTaskState.TASK_LOST) { + if (TaskState.isFailed(TaskState.fromMesos(state))) { failuresBySlaveId(slaveId) = failuresBySlaveId.getOrElse(slaveId, 0) + 1 if (failuresBySlaveId(slaveId) >= MAX_SLAVE_FAILURES) { logInfo("Blacklisting Mesos slave " + slaveId + " due to too many failures; " + diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala index df8f4306b8..06bb527522 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala @@ -318,7 +318,8 @@ private[spark] class MesosSchedulerBackend( val tid = status.getTaskId.getValue.toLong val state = TaskState.fromMesos(status.getState) synchronized { - if (status.getState == MesosTaskState.TASK_LOST && taskIdToSlaveId.contains(tid)) { + if (TaskState.isFailed(TaskState.fromMesos(status.getState)) + && taskIdToSlaveId.contains(tid)) { // We lost the executor on this slave, so remember that it's gone removeExecutor(taskIdToSlaveId(tid), "Lost executor") } |