Skip to content
Snippets Groups Projects
Commit a4041dd8 authored by Charles Reiss's avatar Charles Reiss
Browse files

Log duplicate slaveLost() calls in ClusterScheduler.

parent fa9df4a4
No related branches found
No related tags found
No related merge requests found
...@@ -254,14 +254,20 @@ private[spark] class ClusterScheduler(val sc: SparkContext) ...@@ -254,14 +254,20 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
synchronized { synchronized {
val host = slaveIdToHost(slaveId) val host = slaveIdToHost(slaveId)
if (hostsAlive.contains(host)) { if (hostsAlive.contains(host)) {
logError("Lost an executor on " + host + ": " + reason)
slaveIdsWithExecutors -= slaveId slaveIdsWithExecutors -= slaveId
hostsAlive -= host hostsAlive -= host
activeTaskSetsQueue.foreach(_.hostLost(host)) activeTaskSetsQueue.foreach(_.hostLost(host))
failedHost = Some(host) failedHost = Some(host)
} else {
// We may get multiple slaveLost() calls with different loss reasons. For example, one
// may be triggered by a dropped connection from the slave while another may be a report
// of executor termination from Mesos. We produce log messages for both so we eventually
// report the termination reason.
logError("Lost an executor on " + host + " (already removed): " + reason)
} }
} }
if (failedHost != None) { if (failedHost != None) {
logError("Lost an executor on " + failedHost.get + ": " + reason)
listener.hostLost(failedHost.get) listener.hostLost(failedHost.get)
backend.reviveOffers() backend.reviveOffers()
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment