Skip to content
Snippets Groups Projects
Commit c0b4095e authored by Y.CORP.YAHOO.COM\tgraves's avatar Y.CORP.YAHOO.COM\tgraves
Browse files

Change to use Yarn appropriate directories rather then /tmp or the user specified spark.local.dir

parent f9fc5c16
No related branches found
No related tags found
No related merge requests found
...@@ -47,6 +47,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e ...@@ -47,6 +47,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
private var isFinished:Boolean = false private var isFinished:Boolean = false
def run() { def run() {
// setup the directories so things go to yarn approved directories rather
// then user specified and /tmp
System.setProperty("spark.local.dir", getLocalDirs())
appAttemptId = getApplicationAttemptId() appAttemptId = getApplicationAttemptId()
resourceManager = registerWithResourceManager() resourceManager = registerWithResourceManager()
...@@ -89,6 +92,20 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e ...@@ -89,6 +92,20 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
System.exit(0) System.exit(0)
} }
/** Get the Yarn approved local directories. */
private def getLocalDirs(): String = {
// Hadoop 0.23 and 2.x have different Environment variable names for the
// local dirs, so lets check both. We assume one of the 2 is set.
// LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
var localDirs = System.getenv("LOCAL_DIRS")
val yarnLocalSysDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
yarnLocalSysDirs match {
case Some(s) => localDirs = s
case None => if (localDirs == null) localDirs = ""
}
return localDirs
}
private def getApplicationAttemptId(): ApplicationAttemptId = { private def getApplicationAttemptId(): ApplicationAttemptId = {
val envs = System.getenv() val envs = System.getenv()
......
...@@ -221,6 +221,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl ...@@ -221,6 +221,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
// Add Xmx for am memory // Add Xmx for am memory
JAVA_OPTS += "-Xmx" + amMemory + "m " JAVA_OPTS += "-Xmx" + amMemory + "m "
JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(),
YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
// Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out. // Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out.
// The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same // The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same
// node, spark gc effects all other containers performance (which can also be other spark containers) // node, spark gc effects all other containers performance (which can also be other spark containers)
......
...@@ -75,6 +75,10 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S ...@@ -75,6 +75,10 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S
if (env.isDefinedAt("SPARK_JAVA_OPTS")) { if (env.isDefinedAt("SPARK_JAVA_OPTS")) {
JAVA_OPTS += env("SPARK_JAVA_OPTS") + " " JAVA_OPTS += env("SPARK_JAVA_OPTS") + " "
} }
JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(),
YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
// Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out. // Commenting it out for now - so that people can refer to the properties if required. Remove it once cpuset version is pushed out.
// The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same // The context is, default gc for server class machines end up using all cores to do gc - hence if there are multiple containers in same
// node, spark gc effects all other containers performance (which can also be other spark containers) // node, spark gc effects all other containers performance (which can also be other spark containers)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment