Skip to content
Snippets Groups Projects
Commit 27b3f3f0 authored by Stephen Haberman's avatar Stephen Haberman
Browse files

Handle slaveLost before slaveIdToHost knows about it.

parent d1de9d7d
No related branches found
No related tags found
No related merge requests found
......@@ -252,19 +252,24 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
def slaveLost(slaveId: String, reason: ExecutorLossReason) {
var failedHost: Option[String] = None
synchronized {
val host = slaveIdToHost(slaveId)
if (hostsAlive.contains(host)) {
logError("Lost an executor on " + host + ": " + reason)
slaveIdsWithExecutors -= slaveId
hostsAlive -= host
activeTaskSetsQueue.foreach(_.hostLost(host))
failedHost = Some(host)
} else {
// We may get multiple slaveLost() calls with different loss reasons. For example, one
// may be triggered by a dropped connection from the slave while another may be a report
// of executor termination from Mesos. We produce log messages for both so we eventually
// report the termination reason.
logError("Lost an executor on " + host + " (already removed): " + reason)
slaveIdToHost.get(slaveId) match {
case Some(host) =>
if (hostsAlive.contains(host)) {
logError("Lost an executor on " + host + ": " + reason)
slaveIdsWithExecutors -= slaveId
hostsAlive -= host
activeTaskSetsQueue.foreach(_.hostLost(host))
failedHost = Some(host)
} else {
// We may get multiple slaveLost() calls with different loss reasons. For example, one
// may be triggered by a dropped connection from the slave while another may be a report
// of executor termination from Mesos. We produce log messages for both so we eventually
// report the termination reason.
logError("Lost an executor on " + host + " (already removed): " + reason)
}
case None =>
// We were told about a slave being lost before we could even allocate work to it
logError("Lost slave " + slaveId + " (no work assigned yet)")
}
}
if (failedHost != None) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment