diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 8dc547b379f50ea574445f6b505d3fa10d301fd1..7df43a555d5626e2effa9e3f98bce576d0e86ee4 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -32,8 +32,8 @@ CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf" ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION" -if [ -n "${JAVA_HOME}" ]; then - JAR_CMD="${JAVA_HOME}/bin/jar" +if [ -n "$JAVA_HOME" ]; then + JAR_CMD="$JAVA_HOME/bin/jar" else JAR_CMD="jar" fi @@ -52,40 +52,48 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes" - DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar` - CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR" + ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null) else # Else use spark-assembly jar from either RELEASE or assembly directory if [ -f "$FWDIR/RELEASE" ]; then - ASSEMBLY_JAR=`ls "$FWDIR"/lib/spark-assembly*hadoop*.jar` + ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null) else - ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar` + ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null) fi - jar_error_check=$($JAR_CMD -tf $ASSEMBLY_JAR org/apache/spark/SparkContext 2>&1) - if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then - echo "Loading Spark jar with '$JAR_CMD' failed. " - echo "This is likely because Spark was compiled with Java 7 and run " - echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " - echo "or build Spark with Java 6." - exit 1 - fi - CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" fi +# Verify that versions of java used to build the jars and run Spark are compatible +jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1) +if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then + echo "Loading Spark jar with '$JAR_CMD' failed. " + echo "This is likely because Spark was compiled with Java 7 and run " + echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " + echo "or build Spark with Java 6." + exit 1 +fi + +CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" + # When Hive support is needed, Datanucleus jars must be included on the classpath. -# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. +# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost. # Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is # built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark # assembly is built for Hive, before actually populating the CLASSPATH with the jars. # Note that this check order is faster (by up to half a second) in the case where Hive is not used. -num_datanucleus_jars=$(ls "$FWDIR"/lib_managed/jars/ 2>/dev/null | grep "datanucleus-.*\\.jar" | wc -l) -if [ $num_datanucleus_jars -gt 0 ]; then - AN_ASSEMBLY_JAR=${ASSEMBLY_JAR:-$DEPS_ASSEMBLY_JAR} - num_hive_files=$(jar tvf "$AN_ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null | wc -l) - if [ $num_hive_files -gt 0 ]; then +if [ -f "$FWDIR/RELEASE" ]; then + datanucleus_dir="$FWDIR"/lib +else + datanucleus_dir="$FWDIR"/lib_managed/jars +fi + +datanucleus_jars=$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar") +datanucleus_jars=$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g) + +if [ -n "$datanucleus_jars" ]; then + hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null) + if [ -n "$hive_files" ]; then echo "Spark assembly has been built with Hive, including Datanucleus jars on classpath" 1>&2 - DATANUCLEUSJARS=$(echo "$FWDIR/lib_managed/jars"/datanucleus-*.jar | tr " " :) - CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS + CLASSPATH="$CLASSPATH:$datanucleus_jars" fi fi @@ -105,10 +113,10 @@ fi # Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail ! # Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts # the configurtion files. -if [ "x" != "x$HADOOP_CONF_DIR" ]; then +if [ -n "$HADOOP_CONF_DIR" ]; then CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR" fi -if [ "x" != "x$YARN_CONF_DIR" ]; then +if [ -n "$YARN_CONF_DIR" ]; then CLASSPATH="$CLASSPATH:$YARN_CONF_DIR" fi diff --git a/make-distribution.sh b/make-distribution.sh index 2ab64ff4598af4b1b4a9044bc258a55799d03d1e..ff18d01e7a61633de3257bc9552137b77bc57c7c 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -51,13 +51,13 @@ if [ $? != 0 ]; then exit -1; fi -if [ -z "${JAVA_HOME}" ]; then +if [ -z "$JAVA_HOME" ]; then echo "Error: JAVA_HOME is not set, cannot proceed." exit -1 fi -JAVA_CMD=$JAVA_HOME/bin/java -JAVA_VERSION=$($JAVA_CMD -version 2>&1) +JAVA_CMD="$JAVA_HOME"/bin/java +JAVA_VERSION=$("$JAVA_CMD" -version 2>&1) if ! [[ "$JAVA_VERSION" =~ "1.6" ]]; then echo "Error: JAVA_HOME must point to a JDK 6 installation (see SPARK-1703)." echo "Output from 'java -version' was:" @@ -162,6 +162,10 @@ echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" +if [ "$SPARK_HIVE" == "true" ]; then + cp $FWDIR/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/" +fi + # Copy other things mkdir "$DISTDIR"/conf cp "$FWDIR"/conf/*.template "$DISTDIR"/conf @@ -170,7 +174,6 @@ cp -r "$FWDIR/bin" "$DISTDIR" cp -r "$FWDIR/python" "$DISTDIR" cp -r "$FWDIR/sbin" "$DISTDIR" - # Download and copy in tachyon, if requested if [ "$SPARK_TACHYON" == "true" ]; then TACHYON_VERSION="0.4.1"