From bb98ecafce196ecc5bc3a1e4cc9264df7b752c6a Mon Sep 17 00:00:00 2001
From: Aaron Davidson <aaron@databricks.com>
Date: Thu, 15 May 2014 21:37:58 -0700
Subject: [PATCH] SPARK-1860: Do not cleanup application work/ directories by
 default

This causes an unrecoverable error for applications that are running for longer
than 7 days that have jars added to the SparkContext, as the jars are cleaned up
even though the application is still running.

Author: Aaron Davidson <aaron@databricks.com>

Closes #800 from aarondav/shitty-defaults and squashes the following commits:

a573fbb [Aaron Davidson] SPARK-1860: Do not cleanup application work/ directories by default
---
 .../main/scala/org/apache/spark/deploy/worker/Worker.scala   | 2 +-
 docs/configuration.md                                        | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 134624c35a..fb9cc116cd 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -65,7 +65,7 @@ private[spark] class Worker(
   val REGISTRATION_TIMEOUT = 20.seconds
   val REGISTRATION_RETRIES = 3
 
-  val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", true)
+  val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)
   // How often worker will clean up old app folders
   val CLEANUP_INTERVAL_MILLIS = conf.getLong("spark.worker.cleanup.interval", 60 * 30) * 1000
   // TTL for app folders/data;  after TTL expires it will be cleaned up
diff --git a/docs/configuration.md b/docs/configuration.md
index 2eed96f704..f89040d714 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -390,10 +390,11 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td>spark.worker.cleanup.enabled</td>
-  <td>true</td>
+  <td>false</td>
   <td>
     Enable periodic cleanup of worker / application directories.  Note that this only affects standalone
-    mode, as YARN works differently.
+    mode, as YARN works differently. Applications directories are cleaned up regardless of whether
+    the application is still running.
   </td>
 </tr>
 <tr>
-- 
GitLab