From a3c51c6ea2320efdeb2a6a5c1cd11d714f8994aa Mon Sep 17 00:00:00 2001
From: Davis Shepherd <davis@conviva.com>
Date: Mon, 7 Apr 2014 10:02:00 -0700
Subject: [PATCH] SPARK-1432: Make sure that all metadata fields are properly
 cleaned

While working on spark-1337 with @pwendell, we noticed that not all of the metadata maps in JobProgessListener were being properly cleaned. This could lead to a (hypothetical) memory leak issue should a job run long enough. This patch aims to address the issue.

Author: Davis Shepherd <davis@conviva.com>

Closes #338 from dgshep/master and squashes the following commits:

a77b65c [Davis Shepherd] In the contex of SPARK-1337: Make sure that all metadata fields are properly cleaned
---
 .../scala/org/apache/spark/ui/jobs/JobProgressListener.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index cd4be57227..048f671c87 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -83,7 +83,6 @@ private[ui] class JobProgressListener(conf: SparkConf) extends SparkListener {
     if (stages.size > retainedStages) {
       val toRemove = math.max(retainedStages / 10, 1)
       stages.take(toRemove).foreach { s =>
-        stageIdToTaskData.remove(s.stageId)
         stageIdToTime.remove(s.stageId)
         stageIdToShuffleRead.remove(s.stageId)
         stageIdToShuffleWrite.remove(s.stageId)
@@ -92,8 +91,10 @@ private[ui] class JobProgressListener(conf: SparkConf) extends SparkListener {
         stageIdToTasksActive.remove(s.stageId)
         stageIdToTasksComplete.remove(s.stageId)
         stageIdToTasksFailed.remove(s.stageId)
+        stageIdToTaskData.remove(s.stageId)
+        stageIdToExecutorSummaries.remove(s.stageId)
         stageIdToPool.remove(s.stageId)
-        if (stageIdToDescription.contains(s.stageId)) {stageIdToDescription.remove(s.stageId)}
+        stageIdToDescription.remove(s.stageId)
       }
       stages.trimStart(toRemove)
     }
-- 
GitLab