From c2c107abad8b462218d33c70b946e840663228a1 Mon Sep 17 00:00:00 2001
From: Mike Ihbe <mikejihbe@gmail.com>
Date: Thu, 20 Oct 2016 09:49:58 +0100
Subject: [PATCH] [SPARK-11653][DEPLOY] Allow spark-daemon.sh to run in the
 foreground

## What changes were proposed in this pull request?

Add a SPARK_NO_DAEMONIZE environment variable flag to spark-daemon.sh that causes the process it would run to be run in the foreground.

It looks like there has been some prior work in https://github.com/apache/spark/pull/3881, but there was some talk about these being refactored. I'm not sure if that happened or not, but that PR is almost 2 years old at this point so it was worth revisiting.

## How was this patch tested?

./dev/run-tests still seems to work. It doesn't look like these scripts have tests, but if I missed them just let me know.

Author: Mike Ihbe <mikejihbe@gmail.com>

Closes #15338 from mikejihbe/SPARK-11653.
---
 conf/spark-env.sh.template |  1 +
 sbin/spark-daemon.sh       | 54 ++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index c750c72d19..5c1e876ef9 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -63,3 +63,4 @@
 # - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
 # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
 # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index 5982357112..061019a55e 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -27,6 +27,7 @@
 #   SPARK_PID_DIR   The pid files are stored. /tmp by default.
 #   SPARK_IDENT_STRING   A string representing this instance of spark. $USER by default
 #   SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
+#   SPARK_NO_DAEMONIZE   If set, will run the proposed command in the foreground. It will not output a PID file.
 ##
 
 usage="Usage: spark-daemon.sh [--config <conf-dir>] (start|stop|submit|status) <spark-command> <spark-instance-number> <args...>"
@@ -122,6 +123,35 @@ if [ "$SPARK_NICENESS" = "" ]; then
     export SPARK_NICENESS=0
 fi
 
+execute_command() {
+  local command="$@"
+  if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
+      nohup -- $command >> $log 2>&1 < /dev/null &
+      newpid="$!"
+
+      echo "$newpid" > "$pid"
+
+      # Poll for up to 5 seconds for the java process to start
+      for i in {1..10}
+      do
+        if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
+           break
+        fi
+        sleep 0.5
+      done
+
+      sleep 2
+      # Check if the process has died; in that case we'll tail the log so the user can see
+      if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
+        echo "failed to launch $command:"
+        tail -2 "$log" | sed 's/^/  /'
+        echo "full log in $log"
+      fi
+  else
+      $command
+  fi
+}
+
 run_command() {
   mode="$1"
   shift
@@ -146,13 +176,11 @@ run_command() {
 
   case "$mode" in
     (class)
-      nohup nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
-      newpid="$!"
+      execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class $command $@
       ;;
 
     (submit)
-      nohup nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-submit --class $command "$@" >> "$log" 2>&1 < /dev/null &
-      newpid="$!"
+      execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class $command $@
       ;;
 
     (*)
@@ -161,24 +189,6 @@ run_command() {
       ;;
   esac
 
-  echo "$newpid" > "$pid"
-  
-  #Poll for up to 5 seconds for the java process to start
-  for i in {1..10}
-  do
-    if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
-       break
-    fi
-    sleep 0.5
-  done
-
-  sleep 2
-  # Check if the process has died; in that case we'll tail the log so the user can see
-  if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
-    echo "failed to launch $command:"
-    tail -2 "$log" | sed 's/^/  /'
-    echo "full log in $log"
-  fi
 }
 
 case $option in
-- 
GitLab