aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Reiss <charles@eecs.berkeley.edu>2012-12-13 16:11:08 -0800
committerCharles Reiss <charles@eecs.berkeley.edu>2012-12-13 16:23:36 -0800
commita4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca (patch)
treee800cc99497fe7fa611c83e2a94fdfc859bbbb69
parentfa9df4a45daf5fd8b19df20c1fb7466bde3b2054 (diff)
downloadspark-a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca.tar.gz
spark-a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca.tar.bz2
spark-a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca.zip
Log duplicate slaveLost() calls in ClusterScheduler.
-rw-r--r--core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala8
1 files changed, 7 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
index d160379b14..ab200decb1 100644
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
@@ -254,14 +254,20 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
synchronized {
val host = slaveIdToHost(slaveId)
if (hostsAlive.contains(host)) {
+ logError("Lost an executor on " + host + ": " + reason)
slaveIdsWithExecutors -= slaveId
hostsAlive -= host
activeTaskSetsQueue.foreach(_.hostLost(host))
failedHost = Some(host)
+ } else {
+ // We may get multiple slaveLost() calls with different loss reasons. For example, one
+ // may be triggered by a dropped connection from the slave while another may be a report
+ // of executor termination from Mesos. We produce log messages for both so we eventually
+ // report the termination reason.
+ logError("Lost an executor on " + host + " (already removed): " + reason)
}
}
if (failedHost != None) {
- logError("Lost an executor on " + failedHost.get + ": " + reason)
listener.hostLost(failedHost.get)
backend.reviveOffers()
}