From 70d814665baa8b8ca868d3126452105ecfa5cbff Mon Sep 17 00:00:00 2001 From: Tatiana Borisova <tanyatik@yandex.ru> Date: Thu, 28 Aug 2014 10:36:36 -0700 Subject: [PATCH] [SPARK-3150] Fix NullPointerException in in Spark recovery: Add initializing default values in DriverInfo.init() The issue happens when Spark is run standalone on a cluster. When master and driver fall simultaneously on one node in a cluster, master tries to recover its state and restart spark driver. While restarting driver, it falls with NPE exception (stacktrace is below). After falling, it restarts and tries to recover its state and restart Spark driver again. It happens over and over in an infinite cycle. Namely, Spark tries to read DriverInfo state from zookeeper, but after reading it happens to be null in DriverInfo.worker. https://issues.apache.org/jira/browse/SPARK-3150 Author: Tatiana Borisova <tanyatik@yandex.ru> Closes #2062 from tanyatik/spark-3150 and squashes the following commits: 9936043 [Tatiana Borisova] Add initializing default values in DriverInfo.init() --- .../org/apache/spark/deploy/master/DriverInfo.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala index 33377931d6..80b570a44a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala @@ -33,4 +33,17 @@ private[spark] class DriverInfo( @transient var exception: Option[Exception] = None /* Most recent worker assigned to this driver */ @transient var worker: Option[WorkerInfo] = None + + init() + + private def readObject(in: java.io.ObjectInputStream): Unit = { + in.defaultReadObject() + init() + } + + private def init(): Unit = { + state = DriverState.SUBMITTED + worker = None + exception = None + } } -- GitLab