diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index 9178b852e6bfaea25ee794b16dde7b1b82d8eff4..cf38188c4b255e793d4b149d2adecf0b23c4eab5 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -28,30 +28,27 @@ set FWDIR=%~dp0..\ rem Load environment variables from conf\spark-env.cmd, if it exists if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd" -set CORE_DIR=%FWDIR%core -set REPL_DIR=%FWDIR%repl -set EXAMPLES_DIR=%FWDIR%examples -set BAGEL_DIR=%FWDIR%bagel -set MLLIB_DIR=%FWDIR%mllib -set TOOLS_DIR=%FWDIR%tools -set YARN_DIR=%FWDIR%yarn -set STREAMING_DIR=%FWDIR%streaming -set PYSPARK_DIR=%FWDIR%python - rem Build up classpath -set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes -set CLASSPATH=%CLASSPATH%;%CORE_DIR%\target\scala-%SCALA_VERSION%\test-classes;%CORE_DIR%\src\main\resources -set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\classes;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\test-classes -set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\lib\org\apache\kafka\kafka\0.7.2-spark\* -set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\classes -set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\jars\* -set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\* -set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\* -set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\* -set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes -set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes -set CLASSPATH=%CLASSPATH%;%TOOLS_DIR%\target\scala-%SCALA_VERSION%\classes -set CLASSPATH=%CLASSPATH%;%YARN_DIR%\target\scala-%SCALA_VERSION%\classes +set CLASSPATH=%SPARK_CLASSPATH%;%FWDIR%conf +if exist "%FWDIR%RELEASE" ( + for %%d in ("%FWDIR%jars\spark-assembly*.jar") do ( + set ASSEMBLY_JAR=%%d + ) +) else ( + for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do ( + set ASSEMBLY_JAR=%%d + ) +) +set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR% + +if "x%SPARK_TESTING%"=="x1" ( + rem Add test clases to path + set CLASSPATH=%CLASSPATH%;%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes + set CLASSPATH=%CLASSPATH%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes + set CLASSPATH=%CLASSPATH%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes + set CLASSPATH=%CLASSPATH%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes + set CLASSPATH=%CLASSPATH%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes +) rem Add hadoop conf dir - else FileSystem.*, etc fail rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts @@ -64,9 +61,6 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR% :no_yarn_conf_dir -rem Add Scala standard library -set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar - rem A bit of a hack to allow calling this script within run2.cmd without seeing output if "%DONT_PRINT_CLASSPATH%"=="1" goto exit diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index faf0c2362a24b4fd13e7e8e81964e5c5d3f71778..f2641851cbe7c77d262613e788f52dc6eff5e04c 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -631,20 +631,26 @@ class SparkContext( * filesystems), or an HTTP, HTTPS or FTP URI. */ def addJar(path: String) { - if (null == path) { + if (path == null) { logWarning("null specified as parameter to addJar", new SparkException("null specified as parameter to addJar")) } else { - val env = SparkEnv.get - val uri = new URI(path) - val key = uri.getScheme match { - case null | "file" => - if (env.hadoop.isYarnMode()) { - logWarning("local jar specified as parameter to addJar under Yarn mode") - return - } - env.httpFileServer.addJar(new File(uri.getPath)) - case _ => path + var key = "" + if (path.contains("\\")) { + // For local paths with backslashes on Windows, URI throws an exception + key = env.httpFileServer.addJar(new File(path)) + } else { + val uri = new URI(path) + key = uri.getScheme match { + case null | "file" => + if (env.hadoop.isYarnMode()) { + logWarning("local jar specified as parameter to addJar under Yarn mode") + return + } + env.httpFileServer.addJar(new File(uri.getPath)) + case _ => + path + } } addedJars(key) = System.currentTimeMillis logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key)) diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 9ab6ba0830f470e67a57fe10ed6ab2635b3a62a3..994a96f2c99310c3d68dd239a03c9b6eeb070b4b 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -9,7 +9,7 @@ In addition to running on the Mesos or YARN cluster managers, Spark also provide You can start a standalone master server by executing: - ./spark-class spark.deploy.master.Master + ./spark-class org.apache.spark.deploy.master.Master Once started, the master will print out a `spark://HOST:PORT` URL for itself, which you can use to connect workers to it, or pass as the "master" argument to `SparkContext`. You can also find this URL on @@ -17,7 +17,7 @@ the master's web UI, which is [http://localhost:8080](http://localhost:8080) by Similarly, you can start one or more workers and connect them to the master via: - ./spark-class spark.deploy.worker.Worker spark://IP:PORT + ./spark-class org.apache.spark.deploy.worker.Worker spark://IP:PORT Once you have started a worker, look at the master's web UI ([http://localhost:8080](http://localhost:8080) by default). You should see the new node listed there, along with its number of CPUs and memory (minus one gigabyte left for the OS). diff --git a/run-example.cmd b/run-example.cmd new file mode 100644 index 0000000000000000000000000000000000000000..5b2d048d6ed50a5da9e073ee5a2ef4b4a60fa1b9 --- /dev/null +++ b/run-example.cmd @@ -0,0 +1,23 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem + +rem This is the entry point for running a Spark example. To avoid polluting +rem the environment, it just launches a new cmd to do the real work. + +cmd /V /E /C %~dp0run-example2.cmd %* diff --git a/run-example2.cmd b/run-example2.cmd new file mode 100644 index 0000000000000000000000000000000000000000..dbb371ecfc21e2f27aecd9bebef5713970f47b0f --- /dev/null +++ b/run-example2.cmd @@ -0,0 +1,61 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem + +set SCALA_VERSION=2.9.3 + +rem Figure out where the Spark framework is installed +set FWDIR=%~dp0 + +rem Export this as SPARK_HOME +set SPARK_HOME=%FWDIR% + +rem Load environment variables from conf\spark-env.cmd, if it exists +if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd" + +rem Test that an argument was given +if not "x%1"=="x" goto arg_given + echo Usage: run-example ^<example-class^> [^<args^>] + goto exit +:arg_given + +set EXAMPLES_DIR=%FWDIR%examples + +rem Figure out the JAR file that our examples were packaged into. +set SPARK_EXAMPLES_JAR= +for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do ( + set SPARK_EXAMPLES_JAR=%%d +) +if "x%SPARK_EXAMPLES_JAR%"=="x" ( + echo Failed to find Spark examples assembly JAR. + echo You need to build Spark with sbt\sbt assembly before running this program. + goto exit +) + +rem Compute Spark classpath using external script +set DONT_PRINT_CLASSPATH=1 +call "%FWDIR%bin\compute-classpath.cmd" +set DONT_PRINT_CLASSPATH=0 +set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH% + +rem Figure out where java is. +set RUNNER=java +if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java + +"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %* +:exit diff --git a/sbt/sbt.cmd b/sbt/sbt.cmd index 56a16d77f21421ed075d0c65bb2bff97ed135d6e..681fe00f9210818221e94afcc11db5256ae54ac3 100644 --- a/sbt/sbt.cmd +++ b/sbt/sbt.cmd @@ -22,4 +22,4 @@ if not "%MESOS_HOME%x"=="x" set EXTRA_ARGS=-Djava.library.path=%MESOS_HOME%\lib\ set SPARK_HOME=%~dp0.. -java -Xmx1200M -XX:MaxPermSize=200m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*" +java -Xmx1200M -XX:MaxPermSize=200m -XX:ReservedCodeCacheSize=256m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*" diff --git a/spark-class b/spark-class index 71dfb9c4e6898d50502091a1f6af10b71a3800fa..037abda3b710ab260d385103b7579befc1be1164 100755 --- a/spark-class +++ b/spark-class @@ -31,7 +31,7 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then fi if [ -z "$1" ]; then - echo "Usage: run <spark-class> [<args>]" >&2 + echo "Usage: spark-class <class> [<args>]" >&2 exit 1 fi diff --git a/run.cmd b/spark-class.cmd similarity index 81% rename from run.cmd rename to spark-class.cmd index c91764e617933e6cb37a69b543d5dbe2e16e9383..19850db9e1e5dd813d4fbdffbf24b427c154eb01 100644 --- a/run.cmd +++ b/spark-class.cmd @@ -17,4 +17,7 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem -cmd /V /E /C %~dp0run2.cmd %* +rem This is the entry point for running a Spark class. To avoid polluting +rem the environment, it just launches a new cmd to do the real work. + +cmd /V /E /C %~dp0spark-class2.cmd %* diff --git a/run2.cmd b/spark-class2.cmd similarity index 58% rename from run2.cmd rename to spark-class2.cmd index dc5e50931e38e69f0c8160ea709f162c2a3b6b77..d4d853e8ad930931e7ddf2295c6c1448846a29e0 100644 --- a/run2.cmd +++ b/spark-class2.cmd @@ -30,7 +30,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd" rem Test that an argument was given if not "x%1"=="x" goto arg_given - echo Usage: run ^<spark-class^> [^<args^>] + echo Usage: spark-class ^<class^> [^<args^>] goto exit :arg_given @@ -44,12 +44,6 @@ rem Do not overwrite SPARK_JAVA_OPTS environment variable in this script if "%RUNNING_DAEMON%"=="0" set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% if "%RUNNING_DAEMON%"=="1" set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS% -rem Check that SCALA_HOME has been specified -if not "x%SCALA_HOME%"=="x" goto scala_exists - echo SCALA_HOME is not set - goto exit -:scala_exists - rem Figure out how much memory to use per executor and set it as an environment rem variable so that our process sees it and can report it to Mesos if "x%SPARK_MEM%"=="x" set SPARK_MEM=512m @@ -58,43 +52,27 @@ rem Set JAVA_OPTS to be able to load native libraries and to set heap size set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM% rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala! -set CORE_DIR=%FWDIR%core -set EXAMPLES_DIR=%FWDIR%examples -set REPL_DIR=%FWDIR%repl +rem Test whether the user has built Spark +if exist "%FWDIR%RELEASE" goto skip_build_test +set FOUND_JAR=0 +for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do ( + set FOUND_JAR=1 +) +if "%FOUND_JAR%"=="0" ( + echo Failed to find Spark assembly JAR. + echo You need to build Spark with sbt\sbt assembly before running this program. + goto exit +) +:skip_build_test rem Compute classpath using external script set DONT_PRINT_CLASSPATH=1 call "%FWDIR%bin\compute-classpath.cmd" set DONT_PRINT_CLASSPATH=0 -rem Figure out the JAR file that our examples were packaged into. -rem First search in the build path from SBT: -for %%d in ("examples/target/scala-%SCALA_VERSION%/spark-examples*.jar") do ( - set SPARK_EXAMPLES_JAR=examples/target/scala-%SCALA_VERSION%/%%d -) -rem Then search in the build path from Maven: -for %%d in ("examples/target/spark-examples*hadoop*.jar") do ( - set SPARK_EXAMPLES_JAR=examples/target/%%d -) - -rem Figure out whether to run our class with java or with the scala launcher. -rem In most cases, we'd prefer to execute our process with java because scala -rem creates a shell script as the parent of its Java process, which makes it -rem hard to kill the child with stuff like Process.destroy(). However, for -rem the Spark shell, the wrapper is necessary to properly reset the terminal -rem when we exit, so we allow it to set a variable to launch with scala. -if "%SPARK_LAUNCH_WITH_SCALA%" NEQ 1 goto java_runner - set RUNNER=%SCALA_HOME%\bin\scala - # Java options will be passed to scala as JAVA_OPTS - set EXTRA_ARGS= - goto run_spark -:java_runner - set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar - set RUNNER=java - if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java - rem The JVM doesn't read JAVA_OPTS by default so we need to pass it in - set EXTRA_ARGS=%JAVA_OPTS% -:run_spark +rem Figure out where java is. +set RUNNER=java +if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java -"%RUNNER%" -cp "%CLASSPATH%" %EXTRA_ARGS% %* +"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %* :exit diff --git a/spark-shell.cmd b/spark-shell.cmd index b9b4d4bfb2b760049d54bb5d8458de65e14484a5..3e52bf835e5beac773336bf3ec160e3ad0a5d703 100644 --- a/spark-shell.cmd +++ b/spark-shell.cmd @@ -18,5 +18,5 @@ rem limitations under the License. rem set FWDIR=%~dp0 -set SPARK_LAUNCH_WITH_SCALA=1 -cmd /V /E /C %FWDIR%run2.cmd org.apache.spark.repl.Main %* + +cmd /V /E /C %FWDIR%spark-class2.cmd org.apache.spark.repl.Main %*