diff options
author | Charles Reiss <charles@eecs.berkeley.edu> | 2012-12-13 16:11:08 -0800 |
---|---|---|
committer | Charles Reiss <charles@eecs.berkeley.edu> | 2012-12-13 16:23:36 -0800 |
commit | a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca (patch) | |
tree | e800cc99497fe7fa611c83e2a94fdfc859bbbb69 /core | |
parent | fa9df4a45daf5fd8b19df20c1fb7466bde3b2054 (diff) | |
download | spark-a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca.tar.gz spark-a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca.tar.bz2 spark-a4041dd87f7b33b28de29ef0a4eebe33c7b0e6ca.zip |
Log duplicate slaveLost() calls in ClusterScheduler.
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala index d160379b14..ab200decb1 100644 --- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala @@ -254,14 +254,20 @@ private[spark] class ClusterScheduler(val sc: SparkContext) synchronized { val host = slaveIdToHost(slaveId) if (hostsAlive.contains(host)) { + logError("Lost an executor on " + host + ": " + reason) slaveIdsWithExecutors -= slaveId hostsAlive -= host activeTaskSetsQueue.foreach(_.hostLost(host)) failedHost = Some(host) + } else { + // We may get multiple slaveLost() calls with different loss reasons. For example, one + // may be triggered by a dropped connection from the slave while another may be a report + // of executor termination from Mesos. We produce log messages for both so we eventually + // report the termination reason. + logError("Lost an executor on " + host + " (already removed): " + reason) } } if (failedHost != None) { - logError("Lost an executor on " + failedHost.get + ": " + reason) listener.hostLost(failedHost.get) backend.reviveOffers() } |