aboutsummaryrefslogtreecommitdiff
path: root/resource-managers
diff options
context:
space:
mode:
authorEric Liang <ekl@databricks.com>2017-03-23 23:30:40 -0700
committerKay Ousterhout <kayousterhout@gmail.com>2017-03-23 23:30:44 -0700
commit8e558041aa0c41ba9fb2ce242daaf6d6ed4d85b7 (patch)
tree6c35820390b647b95394e2d478f12756f1d3d608 /resource-managers
parent19596c28b6ef6e7abe0cfccfd2269c2fddf1fdee (diff)
downloadspark-8e558041aa0c41ba9fb2ce242daaf6d6ed4d85b7.tar.gz
spark-8e558041aa0c41ba9fb2ce242daaf6d6ed4d85b7.tar.bz2
spark-8e558041aa0c41ba9fb2ce242daaf6d6ed4d85b7.zip
[SPARK-19820][CORE] Add interface to kill tasks w/ a reason
This commit adds a killTaskAttempt method to SparkContext, to allow users to kill tasks so that they can be re-scheduled elsewhere. This also refactors the task kill path to allow specifying a reason for the task kill. The reason is propagated opaquely through events, and will show up in the UI automatically as `(N killed: $reason)` and `TaskKilled: $reason`. Without this change, there is no way to provide the user feedback through the UI. Currently used reasons are "stage cancelled", "another attempt succeeded", and "killed via SparkContext.killTask". The user can also specify a custom reason through `SparkContext.killTask`. cc rxin In the stage overview UI the reasons are summarized: ![1](https://cloud.githubusercontent.com/assets/14922/23929209/a83b2862-08e1-11e7-8b3e-ae1967bbe2e5.png) Within the stage UI you can see individual task kill reasons: ![2](https://cloud.githubusercontent.com/assets/14922/23929200/9a798692-08e1-11e7-8697-72b27ad8a287.png) Existing tests, tried killing some stages in the UI and verified the messages are as expected. Author: Eric Liang <ekl@databricks.com> Author: Eric Liang <ekl@google.com> Closes #17166 from ericl/kill-reason.
Diffstat (limited to 'resource-managers')
-rw-r--r--resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala3
-rw-r--r--resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala3
2 files changed, 4 insertions, 2 deletions
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index b252539782..a086ec7ea2 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -104,7 +104,8 @@ private[spark] class MesosExecutorBackend
logError("Received KillTask but executor was null")
} else {
// TODO: Determine the 'interruptOnCancel' property set for the given job.
- executor.killTask(t.getValue.toLong, interruptThread = false)
+ executor.killTask(
+ t.getValue.toLong, interruptThread = false, reason = "killed by mesos")
}
}
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index f198f8893b..735c879c63 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -428,7 +428,8 @@ private[spark] class MesosFineGrainedSchedulerBackend(
recordSlaveLost(d, slaveId, ExecutorExited(status, exitCausedByApp = true))
}
- override def killTask(taskId: Long, executorId: String, interruptThread: Boolean): Unit = {
+ override def killTask(
+ taskId: Long, executorId: String, interruptThread: Boolean, reason: String): Unit = {
schedulerDriver.killTask(
TaskID.newBuilder()
.setValue(taskId.toString).build()