From 6d392b36ee1dc6f7e5198dd436e4e62eb816a072 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Wed, 27 Aug 2014 12:39:21 -0700
Subject: [PATCH] [SPARK-2608][Core] Fixed command line option passing issue
 over Mesos via SPARK_EXECUTOR_OPTS

This is another try after #2145 to fix [SPARK-2608](https://issues.apache.org/jira/browse/SPARK-2608).

The basic idea is to pass `extraJavaOpts` and `extraLibraryPath` together via environment variable `SPARK_EXECUTOR_OPTS`. This variable is recognized by `spark-class` and not used anywhere else. In this way, we still launch Mesos executors with `spark-class`/`spark-executor`, but avoids the executor side Spark home issue.

Quoted string with spaces is not allowed in either `extraJavaOpts` or `extraLibraryPath` when using Spark over Mesos. The reason is that Mesos passes the whole command line as a single string argument to `sh -c` to start the executor, and this makes shell string escaping non-trivial to handle. This should be fixed in a later release.

Classes in package `org.apache.spark.deploy` shouldn't be used as they assume Spark is deployed in standalone mode, and give wrong executor side Spark home directory. Please refer to comments in #2145 for more details.

Author: Cheng Lian <lian.cs.zju@gmail.com>

Closes #2161 from liancheng/mesos-fix-with-env-var and squashes the following commits:

ba59190 [Cheng Lian] Added fine grained Mesos executor support
1174076 [Cheng Lian] Draft fix for CoarseMesosSchedulerBackend

(cherry picked from commit 935bffe3bf6c91a42288bff8c1ec69fecb41a769)
Signed-off-by: Reynold Xin <rxin@apache.org>
---
 .../mesos/CoarseMesosSchedulerBackend.scala        | 14 ++++++++++----
 .../cluster/mesos/MesosSchedulerBackend.scala      | 14 ++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
index f0172504c5..8c7cb07ebc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
@@ -122,6 +122,12 @@ private[spark] class CoarseMesosSchedulerBackend(
     val extraLibraryPath = conf.getOption(libraryPathOption).map(p => s"-Djava.library.path=$p")
     val extraOpts = Seq(extraJavaOpts, extraLibraryPath).flatten.mkString(" ")
 
+    environment.addVariables(
+      Environment.Variable.newBuilder()
+        .setName("SPARK_EXECUTOR_OPTS")
+        .setValue(extraOpts)
+        .build())
+
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
         .setName(key)
@@ -140,16 +146,16 @@ private[spark] class CoarseMesosSchedulerBackend(
     if (uri == null) {
       val runScript = new File(sparkHome, "./bin/spark-class").getCanonicalPath
       command.setValue(
-        "\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %s %d".format(
-          runScript, extraOpts, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
+        "\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d".format(
+          runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
     } else {
       // Grab everything to the first '.'. We'll use that and '*' to
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
       command.setValue(
         ("cd %s*; " +
-          "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %s %d")
-          .format(basename, extraOpts, driverUrl, offer.getSlaveId.getValue,
+          "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d")
+          .format(basename, driverUrl, offer.getSlaveId.getValue,
             offer.getHostname, numCores))
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index c717e7c621..e84ce094e5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -90,6 +90,20 @@ private[spark] class MesosSchedulerBackend(
       "Spark home is not set; set it through the spark.home system " +
       "property, the SPARK_HOME environment variable or the SparkContext constructor"))
     val environment = Environment.newBuilder()
+    sc.conf.getOption("spark.executor.extraClassPath").foreach { cp =>
+      environment.addVariables(
+        Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
+    }
+    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
+    val extraLibraryPath = sc.conf.getOption("spark.executor.extraLibraryPath").map { lp =>
+      s"-Djava.library.path=$lp"
+    }
+    val extraOpts = Seq(extraJavaOpts, extraLibraryPath).flatten.mkString(" ")
+    environment.addVariables(
+      Environment.Variable.newBuilder()
+        .setName("SPARK_EXECUTOR_OPTS")
+        .setValue(extraOpts)
+        .build())
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
         .setName(key)
-- 
GitLab