diff --git a/assembly/pom.xml b/assembly/pom.xml index 477d4931c3a88184fb57d1c9106b5526a60da4d3..22cbac06cad6198b18e3f09ef616e5f7f55b0c53 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -33,9 +33,8 @@ <properties> <sbt.project.name>assembly</sbt.project.name> - <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir> - <spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename> - <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar> + <build.testJarPhase>none</build.testJarPhase> + <build.copyDependenciesPhase>package</build.copyDependenciesPhase> </properties> <dependencies> @@ -69,6 +68,17 @@ <artifactId>spark-repl_${scala.binary.version}</artifactId> <version>${project.version}</version> </dependency> + + <!-- + Because we don't shade dependencies anymore, we need to restore Guava to compile scope so + that the libraries Spark depend on have it available. We'll package the version that Spark + uses (14.0.1) which is not the same as Hadoop dependencies, but works. + --> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <scope>${hadoop.deps.scope}</scope> + </dependency> </dependencies> <build> @@ -87,75 +97,26 @@ <skip>true</skip> </configuration> </plugin> - <!-- zip pyspark archives to run python application on yarn mode --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-antrun-plugin</artifactId> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - <configuration> - <target> - <delete dir="${basedir}/../python/lib/pyspark.zip"/> - <zip destfile="${basedir}/../python/lib/pyspark.zip"> - <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/> - </zip> - </target> - </configuration> - </plugin> - <!-- Use the shade plugin to create a big JAR with all the dependencies --> + <!-- zip pyspark archives to run python application on yarn mode --> <plugin> <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <configuration> - <shadedArtifactAttached>false</shadedArtifactAttached> - <outputFile>${spark.jar}</outputFile> - <artifactSet> - <includes> - <include>*:*</include> - </includes> - </artifactSet> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>org/datanucleus/**</exclude> - <exclude>META-INF/*.SF</exclude> - <exclude>META-INF/*.DSA</exclude> - <exclude>META-INF/*.RSA</exclude> - </excludes> - </filter> - </filters> - </configuration> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <transformers> - <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource> - </transformer> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>reference.conf</resource> - </transformer> - <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer"> - <resource>log4j.properties</resource> - </transformer> - <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/> - <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/> - </transformers> - </configuration> - </execution> - </executions> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + <configuration> + <target> + <delete dir="${basedir}/../python/lib/pyspark.zip"/> + <zip destfile="${basedir}/../python/lib/pyspark.zip"> + <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/> + </zip> + </target> + </configuration> </plugin> </plugins> </build> diff --git a/bin/spark-class b/bin/spark-class index e710e388be1bc15aa35427e643af76035e10510b..b489591778cb535cb2244efd7fae2830e6c4a3a6 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -36,21 +36,20 @@ else fi # Find Spark jars. -# TODO: change the directory name when Spark jars move from "lib". if [ -f "${SPARK_HOME}/RELEASE" ]; then - SPARK_JARS_DIR="${SPARK_HOME}/lib" + SPARK_JARS_DIR="${SPARK_HOME}/jars" else - SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION" + SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars" fi -if [ ! -d "$SPARK_JARS_DIR" ]; then +if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2 echo "You need to build Spark before running this program." 1>&2 exit 1 +else + LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" fi -LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" - # Add the launcher build dir to the classpath if requested. if [ -n "$SPARK_PREPEND_CLASSES" ]; then LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH" diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index 565b87c102b19669e0c57d62b1362434bd7f9a20..579efff909535df5dea50a6959116ae4caa4819f 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -29,11 +29,10 @@ if "x%1"=="x" ( ) rem Find Spark jars. -rem TODO: change the directory name when Spark jars move from "lib". if exist "%SPARK_HOME%\RELEASE" ( - set SPARK_JARS_DIR="%SPARK_HOME%\lib" + set SPARK_JARS_DIR="%SPARK_HOME%\jars" ) else ( - set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%" + set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars" ) if not exist "%SPARK_JARS_DIR%"\ ( diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 50bcf8580597f8c440850d3e08a42138f17b2e0e..c304629bcdbe9128bf7ac85c61b7115ae733e72a 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1121,9 +1121,9 @@ private[spark] object Utils extends Logging { extraEnvironment: Map[String, String] = Map.empty, redirectStderr: Boolean = true): String = { val process = executeCommand(command, workingDir, extraEnvironment, redirectStderr) - val output = new StringBuffer + val output = new StringBuilder val threadName = "read stdout for " + command(0) - def appendToOutput(s: String): Unit = output.append(s) + def appendToOutput(s: String): Unit = output.append(s).append("\n") val stdoutThread = processStreamByLine(threadName, process.getInputStream, appendToOutput) val exitCode = process.waitFor() stdoutThread.join() // Wait for it to finish reading output diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala index 280e4964980d986d4162c69ad14fc140064410ee..4fa9f9a8f590f89f3ecbb56bded468f888f2609a 100644 --- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala @@ -201,24 +201,29 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging { // Make sure only logging errors val logger = Logger.getRootLogger + val oldLogLevel = logger.getLevel logger.setLevel(Level.ERROR) - logger.addAppender(mockAppender) + try { + logger.addAppender(mockAppender) - val testOutputStream = new PipedOutputStream() - val testInputStream = new PipedInputStream(testOutputStream) + val testOutputStream = new PipedOutputStream() + val testInputStream = new PipedInputStream(testOutputStream) - // Close the stream before appender tries to read will cause an IOException - testInputStream.close() - testOutputStream.close() - val appender = FileAppender(testInputStream, testFile, new SparkConf) + // Close the stream before appender tries to read will cause an IOException + testInputStream.close() + testOutputStream.close() + val appender = FileAppender(testInputStream, testFile, new SparkConf) - appender.awaitTermination() + appender.awaitTermination() - // If InputStream was closed without first stopping the appender, an exception will be logged - verify(mockAppender, atLeast(1)).doAppend(loggingEventCaptor.capture) - val loggingEvent = loggingEventCaptor.getValue - assert(loggingEvent.getThrowableInformation !== null) - assert(loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException]) + // If InputStream was closed without first stopping the appender, an exception will be logged + verify(mockAppender, atLeast(1)).doAppend(loggingEventCaptor.capture) + val loggingEvent = loggingEventCaptor.getValue + assert(loggingEvent.getThrowableInformation !== null) + assert(loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException]) + } finally { + logger.setLevel(oldLogLevel) + } } test("file appender async close stream gracefully") { @@ -228,30 +233,35 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging { // Make sure only logging errors val logger = Logger.getRootLogger + val oldLogLevel = logger.getLevel logger.setLevel(Level.ERROR) - logger.addAppender(mockAppender) + try { + logger.addAppender(mockAppender) - val testOutputStream = new PipedOutputStream() - val testInputStream = new PipedInputStream(testOutputStream) with LatchedInputStream + val testOutputStream = new PipedOutputStream() + val testInputStream = new PipedInputStream(testOutputStream) with LatchedInputStream - // Close the stream before appender tries to read will cause an IOException - testInputStream.close() - testOutputStream.close() - val appender = FileAppender(testInputStream, testFile, new SparkConf) + // Close the stream before appender tries to read will cause an IOException + testInputStream.close() + testOutputStream.close() + val appender = FileAppender(testInputStream, testFile, new SparkConf) - // Stop the appender before an IOException is called during read - testInputStream.latchReadStarted.await() - appender.stop() - testInputStream.latchReadProceed.countDown() + // Stop the appender before an IOException is called during read + testInputStream.latchReadStarted.await() + appender.stop() + testInputStream.latchReadProceed.countDown() - appender.awaitTermination() + appender.awaitTermination() - // Make sure no IOException errors have been logged as a result of appender closing gracefully - verify(mockAppender, atLeast(0)).doAppend(loggingEventCaptor.capture) - import scala.collection.JavaConverters._ - loggingEventCaptor.getAllValues.asScala.foreach { loggingEvent => - assert(loggingEvent.getThrowableInformation === null - || !loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException]) + // Make sure no IOException errors have been logged as a result of appender closing gracefully + verify(mockAppender, atLeast(0)).doAppend(loggingEventCaptor.capture) + import scala.collection.JavaConverters._ + loggingEventCaptor.getAllValues.asScala.foreach { loggingEvent => + assert(loggingEvent.getThrowableInformation === null + || !loggingEvent.getThrowableInformation.getThrowable.isInstanceOf[IOException]) + } + } finally { + logger.setLevel(oldLogLevel) } } diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2 index 3865a9fb1612884562e210e95ac92ef8234ae96f..2c24366cc3a15ec9c28859af48c65fa6d01c6621 100644 --- a/dev/deps/spark-deps-hadoop-2.2 +++ b/dev/deps/spark-deps-hadoop-2.2 @@ -12,7 +12,6 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar bonecp-0.8.0.RELEASE.jar @@ -61,6 +60,7 @@ grizzly-http-2.1.2.jar grizzly-http-server-2.1.2.jar grizzly-http-servlet-2.1.2.jar grizzly-rcm-2.1.2.jar +guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar hadoop-annotations-2.2.0.jar @@ -164,7 +164,6 @@ scala-parser-combinators_2.11-1.0.4.jar scala-reflect-2.11.8.jar scala-xml_2.11-1.0.2.jar scalap-2.11.8.jar -servlet-api-2.5.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snappy-0.2.jar @@ -177,7 +176,6 @@ stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar univocity-parsers-1.5.6.jar -unused-1.0.0.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index 4313799da783038c8f3b91a79ee01b464c41f4ba..e9cb0d8f3eac267d62fcd9774cbc8140aa135e2b 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -12,7 +12,6 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar @@ -56,6 +55,7 @@ eigenbase-properties-1.1.5.jar geronimo-annotation_1.0_spec-1.1.1.jar geronimo-jaspic_1.0_spec-1.0.jar geronimo-jta_1.1_spec-1.1.1.jar +guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar hadoop-annotations-2.3.0.jar @@ -155,7 +155,6 @@ scala-parser-combinators_2.11-1.0.4.jar scala-reflect-2.11.8.jar scala-xml_2.11-1.0.2.jar scalap-2.11.8.jar -servlet-api-2.5.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snappy-0.2.jar @@ -168,7 +167,6 @@ stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar univocity-parsers-1.5.6.jar -unused-1.0.0.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index 910ea685f26fd22d2954e404292ee66e84abda86..d8d1840da5531375cf455956b41c368cb52ff87a 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -12,7 +12,6 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar @@ -56,6 +55,7 @@ eigenbase-properties-1.1.5.jar geronimo-annotation_1.0_spec-1.1.1.jar geronimo-jaspic_1.0_spec-1.0.jar geronimo-jta_1.1_spec-1.1.1.jar +guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar hadoop-annotations-2.4.0.jar @@ -156,7 +156,6 @@ scala-parser-combinators_2.11-1.0.4.jar scala-reflect-2.11.8.jar scala-xml_2.11-1.0.2.jar scalap-2.11.8.jar -servlet-api-2.5.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snappy-0.2.jar @@ -169,7 +168,6 @@ stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar univocity-parsers-1.5.6.jar -unused-1.0.0.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 0692f24e47db5c3fe24bed37037adfe52a0bc439..8beede1e38d28ef7e0932fdb8146204ca3b99d0c 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -16,7 +16,6 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar @@ -61,6 +60,7 @@ geronimo-annotation_1.0_spec-1.1.1.jar geronimo-jaspic_1.0_spec-1.0.jar geronimo-jta_1.1_spec-1.1.1.jar gson-2.2.4.jar +guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar hadoop-annotations-2.6.0.jar @@ -162,7 +162,6 @@ scala-parser-combinators_2.11-1.0.4.jar scala-reflect-2.11.8.jar scala-xml_2.11-1.0.2.jar scalap-2.11.8.jar -servlet-api-2.5.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snappy-0.2.jar @@ -175,7 +174,6 @@ stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar univocity-parsers-1.5.6.jar -unused-1.0.0.jar xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index e397558e058d95812bdbe118eb59c2b17da60130..a9d814f944872a5d27ae80a051891369080e33ce 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -16,7 +16,6 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar @@ -61,6 +60,7 @@ geronimo-annotation_1.0_spec-1.1.1.jar geronimo-jaspic_1.0_spec-1.0.jar geronimo-jta_1.1_spec-1.1.1.jar gson-2.2.4.jar +guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar hadoop-annotations-2.7.0.jar @@ -163,7 +163,6 @@ scala-parser-combinators_2.11-1.0.4.jar scala-reflect-2.11.8.jar scala-xml_2.11-1.0.2.jar scalap-2.11.8.jar -servlet-api-2.5.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snappy-0.2.jar @@ -176,7 +175,6 @@ stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar univocity-parsers-1.5.6.jar -unused-1.0.0.jar xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index dbdd42ff9e0872e357deff64b33af2edabea323a..4f7544f6ea78b3e7cf026ef8ec0488db2f46169b 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -160,28 +160,35 @@ echo -e "\$ ${BUILD_COMMAND[@]}\n" # Make directories rm -rf "$DISTDIR" -mkdir -p "$DISTDIR/lib" +mkdir -p "$DISTDIR/jars" echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE" echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars -cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" -# This will fail if the -Pyarn profile is not provided -# In this case, silence the error and ignore the return code of this command -cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || : +cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" + +# Only create the yarn directory if the yarn artifacts were build. +if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then + mkdir "$DISTDIR"/yarn + cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn" +fi # Copy examples and dependencies mkdir -p "$DISTDIR/examples/jars" cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars" +# Deduplicate jars that have already been packaged as part of the main Spark dependencies. +for f in "$DISTDIR/examples/jars/"*; do + name=$(basename "$f") + if [ -f "$DISTDIR/jars/$name" ]; then + rm "$DISTDIR/examples/jars/$name" + fi +done + # Copy example sources (needed for python and SQL) mkdir -p "$DISTDIR/examples/src/main" cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/" -if [ "$SPARK_HIVE" == "1" ]; then - cp "$SPARK_HOME"/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/" -fi - # Copy license and ASF files cp "$SPARK_HOME/LICENSE" "$DISTDIR" cp -r "$SPARK_HOME/licenses" "$DISTDIR" diff --git a/dev/mima b/dev/mima index ea746e6f01b494a350bd089f33da6fc7eb207e7f..c3553490451c8bdce467cc216a6711562ab6ab97 100755 --- a/dev/mima +++ b/dev/mima @@ -25,8 +25,8 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)" cd "$FWDIR" SPARK_PROFILES="-Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive" -TOOLS_CLASSPATH="$(build/sbt "export tools/fullClasspath" | tail -n1)" -OLD_DEPS_CLASSPATH="$(build/sbt $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)" +TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)" +OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)" rm -f .generated-mima* @@ -36,7 +36,7 @@ java \ -cp "$TOOLS_CLASSPATH:$OLD_DEPS_CLASSPATH" \ org.apache.spark.tools.GenerateMIMAIgnore -echo -e "q\n" | build/sbt mimaReportBinaryIssues | grep -v -e "info.*Resolving" +echo -e "q\n" | build/sbt -DcopyDependencies=false "$@" mimaReportBinaryIssues | grep -v -e "info.*Resolving" ret_val=$? if [ $ret_val != 0 ]; then diff --git a/dev/run-tests.py b/dev/run-tests.py index c2944747ee5ca340370e228891012a3ed86389df..cbe347274e62cd1664745b3615711506357274a5 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -350,7 +350,7 @@ def build_spark_sbt(hadoop_version): def build_spark_assembly_sbt(hadoop_version): # Enable all of the profiles for the build: build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags - sbt_goals = ["assembly/assembly"] + sbt_goals = ["assembly/package"] profiles_and_goals = build_profiles + sbt_goals print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ", " ".join(profiles_and_goals)) @@ -371,9 +371,10 @@ def build_apache_spark(build_tool, hadoop_version): build_spark_sbt(hadoop_version) -def detect_binary_inop_with_mima(): +def detect_binary_inop_with_mima(hadoop_version): + build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA") - run_cmd([os.path.join(SPARK_HOME, "dev", "mima")]) + run_cmd([os.path.join(SPARK_HOME, "dev", "mima")] + build_profiles) def run_scala_tests_maven(test_profiles): @@ -571,8 +572,8 @@ def main(): # backwards compatibility checks if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now - detect_binary_inop_with_mima() - # Since we did not build assembly/assembly before running dev/mima, we need to + detect_binary_inop_with_mima(hadoop_version) + # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. build_spark_assembly_sbt(hadoop_version) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 2fdc97f8a02dc56249d9c0e5658a4615529d57fb..274a8edb0c773adea4558f31367795087e946bb8 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1687,12 +1687,7 @@ on all of the worker nodes, as they will need access to the Hive serialization a (SerDes) in order to access data stored in Hive. Configuration of Hive is done by placing your `hive-site.xml`, `core-site.xml` (for security configuration), - `hdfs-site.xml` (for HDFS configuration) file in `conf/`. Please note when running -the query on a YARN cluster (`cluster` mode), the `datanucleus` jars under the `lib` directory -and `hive-site.xml` under `conf/` directory need to be available on the driver and all executors launched by the -YARN cluster. The convenient way to do this is adding them through the `--jars` option and `--file` option of the -`spark-submit` command. - +`hdfs-site.xml` (for HDFS configuration) file in `conf/`. <div class="codetabs"> diff --git a/examples/pom.xml b/examples/pom.xml index b7f37978b945df0f04aa3f79c28bcb411d84bf72..4a20370f0668df247cc38d5919fe5d6b99d4f9e1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -27,13 +27,16 @@ <groupId>org.apache.spark</groupId> <artifactId>spark-examples_2.11</artifactId> - <properties> - <sbt.project.name>examples</sbt.project.name> - </properties> <packaging>jar</packaging> <name>Spark Project Examples</name> <url>http://spark.apache.org/</url> + <properties> + <sbt.project.name>examples</sbt.project.name> + <build.testJarPhase>none</build.testJarPhase> + <build.copyDependenciesPhase>package</build.copyDependenciesPhase> + </properties> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> @@ -75,23 +78,6 @@ <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId> <version>${project.version}</version> </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase-testing-util</artifactId> - <version>${hbase.version}</version> - <scope>${hbase.deps.scope}</scope> - <exclusions> - <exclusion> - <!-- SPARK-4455 --> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase-annotations</artifactId> - </exclusion> - <exclusion> - <groupId>org.jruby</groupId> - <artifactId>jruby-complete</artifactId> - </exclusion> - </exclusions> - </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-protocol</artifactId> @@ -139,6 +125,10 @@ <groupId>org.apache.hbase</groupId> <artifactId>hbase-annotations</artifactId> </exclusion> + <exclusion> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-common</artifactId> + </exclusion> <exclusion> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> @@ -208,13 +198,6 @@ <version>${hbase.version}</version> <scope>${hbase.deps.scope}</scope> </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase-hadoop-compat</artifactId> - <version>${hbase.version}</version> - <type>test-jar</type> - <scope>test</scope> - </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-math3</artifactId> @@ -294,17 +277,6 @@ <artifactId>scopt_${scala.binary.version}</artifactId> <version>3.3.0</version> </dependency> - - <!-- - The following dependencies are already present in the Spark assembly, so we want to force - them to be provided. - --> - <dependency> - <groupId>org.scala-lang</groupId> - <artifactId>scala-library</artifactId> - <scope>provided</scope> - </dependency> - </dependencies> <build> @@ -325,38 +297,6 @@ <skip>true</skip> </configuration> </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <executions> - <execution> - <id>prepare-test-jar</id> - <phase>none</phase> - <goals> - <goal>test-jar</goal> - </goals> - </execution> - </executions> - <configuration> - <outputDirectory>${jars.target.dir}</outputDirectory> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-dependency-plugin</artifactId> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <includeScope>runtime</includeScope> - <outputDirectory>${jars.target.dir}</outputDirectory> - </configuration> - </execution> - </executions> - </plugin> </plugins> </build> <profiles> diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index d02b2a499455e3ca27c7103e388607f6539a9acd..7a5e37c5016333440b18d5422f061d9f21faf1d7 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -144,10 +144,26 @@ abstract class AbstractCommandBuilder { boolean isTesting = "1".equals(getenv("SPARK_TESTING")); if (prependClasses || isTesting) { String scala = getScalaVersion(); - List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx", - "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver", - "yarn", "launcher", - "common/network-common", "common/network-shuffle", "common/network-yarn"); + List<String> projects = Arrays.asList( + "common/network-common", + "common/network-shuffle", + "common/network-yarn", + "common/sketch", + "common/tags", + "common/unsafe", + "core", + "examples", + "graphx", + "launcher", + "mllib", + "repl", + "sql/catalyst", + "sql/core", + "sql/hive", + "sql/hive-thriftserver", + "streaming", + "yarn" + ); if (prependClasses) { if (!isTesting) { System.err.println( @@ -174,31 +190,12 @@ abstract class AbstractCommandBuilder { // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and // propagate the test classpath appropriately. For normal invocation, look for the jars // directory under SPARK_HOME. - String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting); + boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING")); + String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql); if (jarsDir != null) { addToClassPath(cp, join(File.separator, jarsDir, "*")); } - // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only - // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate - // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive - File libdir; - if (new File(sparkHome, "RELEASE").isFile()) { - libdir = new File(sparkHome, "lib"); - } else { - libdir = new File(sparkHome, "lib_managed/jars"); - } - - if (libdir.isDirectory()) { - for (File jar : libdir.listFiles()) { - if (jar.getName().startsWith("datanucleus-")) { - addToClassPath(cp, jar.getAbsolutePath()); - } - } - } else { - checkState(isTesting, "Library directory '%s' does not exist.", libdir.getAbsolutePath()); - } - addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java index a08c8dcba402b3021254816588416742e718b0ab..91586aad7b7099b230f1e33903c9b4cd370cf0bd 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java +++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java @@ -358,12 +358,12 @@ class CommandBuilderUtils { // TODO: change to the correct directory once the assembly build is changed. File libdir; if (new File(sparkHome, "RELEASE").isFile()) { - libdir = new File(sparkHome, "lib"); + libdir = new File(sparkHome, "jars"); checkState(!failIfNotFound || libdir.isDirectory(), "Library directory '%s' does not exist.", libdir.getAbsolutePath()); } else { - libdir = new File(sparkHome, String.format("assembly/target/scala-%s", scalaVersion)); + libdir = new File(sparkHome, String.format("assembly/target/scala-%s/jars", scalaVersion)); if (!libdir.isDirectory()) { checkState(!failIfNotFound, "Library directory '%s' does not exist; make sure Spark is built.", diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 56e4107c5a0c75a5680c77cc1766edd099a547be..c31c42cd3a41e47065a9a7c8a4885a0d804cf8de 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -336,6 +336,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { } private List<String> findExamplesJars() { + boolean isTesting = "1".equals(getenv("SPARK_TESTING")); List<String> examplesJars = new ArrayList<>(); String sparkHome = getSparkHome(); @@ -346,11 +347,15 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder { jarsDir = new File(sparkHome, String.format("examples/target/scala-%s/jars", getScalaVersion())); } - checkState(jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.", + + boolean foundDir = jarsDir.isDirectory(); + checkState(isTesting || foundDir, "Examples jars directory '%s' does not exist.", jarsDir.getAbsolutePath()); - for (File f: jarsDir.listFiles()) { - examplesJars.add(f.getAbsolutePath()); + if (foundDir) { + for (File f: jarsDir.listFiles()) { + examplesJars.add(f.getAbsolutePath()); + } } return examplesJars; } diff --git a/pom.xml b/pom.xml index e135c92c078641505cdb70996dfdf69a9a33ef39..984b2859efbec2333bffadadf214f4074d10007b 100644 --- a/pom.xml +++ b/pom.xml @@ -185,6 +185,10 @@ <!-- Modules that copy jars to the build directory should do so under this location. --> <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir> + <!-- Allow modules to enable / disable certain build plugins easily. --> + <build.testJarPhase>prepare-package</build.testJarPhase> + <build.copyDependenciesPhase>none</build.copyDependenciesPhase> + <!-- Dependency scopes that can be overridden by enabling certain profiles. These profiles are declared in the projects that build assemblies. @@ -237,15 +241,6 @@ </pluginRepository> </pluginRepositories> <dependencies> - <!-- - This is a dummy dependency that is used along with the shading plug-in - to create effective poms on publishing (see SPARK-3812). - --> - <dependency> - <groupId>org.spark-project.spark</groupId> - <artifactId>unused</artifactId> - <version>1.0.0</version> - </dependency> <!-- This is needed by the scalatest plugin, and so is declared here to be available in all child modules, just as scalatest is run in all children @@ -833,6 +828,14 @@ </exclusion> </exclusions> </dependency> + <!-- avro-mapred for some reason depends on avro-ipc's test jar, so undo that. --> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-ipc</artifactId> + <classifier>tests</classifier> + <version>${avro.version}</version> + <scope>test</scope> + </dependency> <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro-mapred</artifactId> @@ -1521,6 +1524,10 @@ <groupId>org.codehaus.groovy</groupId> <artifactId>groovy-all</artifactId> </exclusion> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> </exclusions> </dependency> @@ -1916,6 +1923,7 @@ --> <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH> <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES> + <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION> <SPARK_TESTING>1</SPARK_TESTING> <JAVA_HOME>${test.java.home}</JAVA_HOME> </environmentVariables> @@ -1964,6 +1972,7 @@ --> <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH> <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES> + <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION> <SPARK_TESTING>1</SPARK_TESTING> <JAVA_HOME>${test.java.home}</JAVA_HOME> </environmentVariables> @@ -2146,6 +2155,7 @@ <version>2.10</version> <executions> <execution> + <id>generate-test-classpath</id> <phase>test-compile</phase> <goals> <goal>build-classpath</goal> @@ -2155,6 +2165,17 @@ <outputProperty>test_classpath</outputProperty> </configuration> </execution> + <execution> + <id>copy-module-dependencies</id> + <phase>${build.copyDependenciesPhase}</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <includeScope>runtime</includeScope> + <outputDirectory>${jars.target.dir}</outputDirectory> + </configuration> + </execution> </executions> </plugin> @@ -2169,9 +2190,6 @@ <shadedArtifactAttached>false</shadedArtifactAttached> <artifactSet> <includes> - <!-- At a minimum we must include this to force effective pom generation --> - <include>org.spark-project.spark:unused</include> - <include>org.eclipse.jetty:jetty-io</include> <include>org.eclipse.jetty:jetty-http</include> <include>org.eclipse.jetty:jetty-continuation</include> @@ -2302,7 +2320,7 @@ <executions> <execution> <id>prepare-test-jar</id> - <phase>prepare-package</phase> + <phase>${build.testJarPhase}</phase> <goals> <goal>test-jar</goal> </goals> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5d62b688b9eaf6c51dea270f6853f8a99577c172..b32480b1646bffec231a5b671fd79b62f6fa10cc 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -57,11 +57,12 @@ object BuildCommons { Seq("yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl", "docker-integration-tests").map(ProjectRef(buildLocation, _)) - val assemblyProjects@Seq(assembly, networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) = - Seq("assembly", "network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly") + val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKinesisAslAssembly) = + Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly") .map(ProjectRef(buildLocation, _)) - val copyJarsProjects@Seq(examples) = Seq("examples").map(ProjectRef(buildLocation, _)) + val copyJarsProjects@Seq(assembly, examples) = Seq("assembly", "examples") + .map(ProjectRef(buildLocation, _)) val tools = ProjectRef(buildLocation, "tools") // Root project. @@ -263,8 +264,14 @@ object SparkBuild extends PomBuild { /* Unsafe settings */ enable(Unsafe.settings)(unsafe) - /* Set up tasks to copy dependencies during packaging. */ - copyJarsProjects.foreach(enable(CopyDependencies.settings)) + /* + * Set up tasks to copy dependencies during packaging. This step can be disabled in the command + * line, so that dev/mima can run without trying to copy these files again and potentially + * causing issues. + */ + if (!"false".equals(System.getProperty("copyDependencies"))) { + copyJarsProjects.foreach(enable(CopyDependencies.settings)) + } /* Enable Assembly for all assembly projects */ assemblyProjects.foreach(enable(Assembly.settings)) @@ -477,8 +484,6 @@ object Assembly { val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.") - val deployDatanucleusJars = taskKey[Unit]("Deploy datanucleus jars to the spark/lib_managed/jars directory") - lazy val settings = assemblySettings ++ Seq( test in assembly := {}, hadoopVersion := { @@ -497,27 +502,13 @@ object Assembly { s"${mName}-test-${v}.jar" }, mergeStrategy in assembly := { - case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard case "log4j.properties" => MergeStrategy.discard case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines case "reference.conf" => MergeStrategy.concat case _ => MergeStrategy.first - }, - deployDatanucleusJars := { - val jars: Seq[File] = (fullClasspath in assembly).value.map(_.data) - .filter(_.getPath.contains("org.datanucleus")) - var libManagedJars = new File(BuildCommons.sparkHome, "lib_managed/jars") - libManagedJars.mkdirs() - jars.foreach { jar => - val dest = new File(libManagedJars, jar.getName) - if (!dest.exists()) { - Files.copy(jar.toPath, dest.toPath) - } - } - }, - assembly <<= assembly.dependsOn(deployDatanucleusJars) + } ) } @@ -698,6 +689,13 @@ object Java8TestSettings { object TestSettings { import BuildCommons._ + private val scalaBinaryVersion = + if (System.getProperty("scala-2.10") == "true") { + "2.10" + } else { + "2.11" + } + lazy val settings = Seq ( // Fork new JVMs for tests and set Java options for those fork := true, @@ -707,6 +705,7 @@ object TestSettings { "SPARK_DIST_CLASSPATH" -> (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"), "SPARK_PREPEND_CLASSES" -> "1", + "SPARK_SCALA_VERSION" -> scalaBinaryVersion, "SPARK_TESTING" -> "1", "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))), javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir", @@ -744,7 +743,7 @@ object TestSettings { // Make sure the test temp directory exists. resourceGenerators in Test <+= resourceManaged in Test map { outDir: File => if (!new File(testTempDir).isDirectory()) { - require(new File(testTempDir).mkdirs()) + require(new File(testTempDir).mkdirs(), s"Error creating temp directory $testTempDir.") } Seq[File]() }, diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py index d010c0e0080c4941f8477fbeb6ab51f82a6d4ac2..148bf7e8ff5ce5971e377e7afbe6abfa52a1fcb9 100644 --- a/python/pyspark/streaming/tests.py +++ b/python/pyspark/streaming/tests.py @@ -1482,7 +1482,7 @@ def search_kafka_assembly_jar(): raise Exception( ("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) + "You need to build Spark with " - "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or " + "'build/sbt assembly/package streaming-kafka-assembly/assembly' or " "'build/mvn package' before running this test.") elif len(jars) > 1: raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: %s; please " @@ -1548,7 +1548,7 @@ if __name__ == "__main__": elif are_kinesis_tests_enabled is False: sys.stderr.write("Skipping all Kinesis Python tests as the optional Kinesis project was " "not compiled into a JAR. To run these tests, " - "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/assembly " + "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package " "streaming-kinesis-asl-assembly/assembly' or " "'build/mvn -Pkinesis-asl package' before running this test.") else: @@ -1556,7 +1556,7 @@ if __name__ == "__main__": ("Failed to find Spark Streaming Kinesis assembly jar in %s. " % kinesis_asl_assembly_dir) + "You need to build Spark with 'build/sbt -Pkinesis-asl " - "assembly/assembly streaming-kinesis-asl-assembly/assembly'" + "assembly/package streaming-kinesis-asl-assembly/assembly'" "or 'build/mvn -Pkinesis-asl package' before running this test.") sys.stderr.write("Running tests: %s \n" % (str(testcases))) diff --git a/python/run-tests.py b/python/run-tests.py index a9f8854e6f66aba9282e629d86128cec4a5b427e..38b3bb84c10bece8b7ac9e6455dec4eca89214c9 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -53,11 +53,25 @@ LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log") FAILURE_REPORTING_LOCK = Lock() LOGGER = logging.getLogger() +# Find out where the assembly jars are located. +for scala in ["2.11", "2.10"]: + build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala) + if os.path.isdir(build_dir): + SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*") + break +else: + raise Exception("Cannot find assembly build directory, please build Spark first.") + def run_individual_python_test(test_name, pyspark_python): env = dict(os.environ) - env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python), - 'PYSPARK_DRIVER_PYTHON': which(pyspark_python)}) + env.update({ + 'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH, + 'SPARK_TESTING': '1', + 'SPARK_PREPEND_CLASSES': '1', + 'PYSPARK_PYTHON': which(pyspark_python), + 'PYSPARK_DRIVER_PYTHON': which(pyspark_python) + }) LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name) start_time = time.time() try: diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 33af624cfda7806dbdd06b60e80a27d109d36479..2c7358e59a2d62d0f10a5d44f777a2a5e70e8cf2 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -763,11 +763,15 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl extraEnvironment = Map( // Disables SPARK_TESTING to exclude log4j.properties in test directories. "SPARK_TESTING" -> "0", + // But set SPARK_SQL_TESTING to make spark-class happy. + "SPARK_SQL_TESTING" -> "1", // Points SPARK_PID_DIR to SPARK_HOME, otherwise only 1 Thrift server instance can be // started at a time, which is not Jenkins friendly. "SPARK_PID_DIR" -> pidDir.getCanonicalPath), redirectStderr = true) + logInfo(s"COMMAND: $command") + logInfo(s"OUTPUT: $lines") lines.split("\n").collectFirst { case line if line.contains(LOG_FILE_MARK) => new File(line.drop(LOG_FILE_MARK.length)) }.getOrElse { diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 58efd80512a50482cb91f109e983e29e54819c36..61504becf1f38066d8764819ec93438e21d4b3cd 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -225,30 +225,6 @@ <argLine>-da -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine> </configuration> </plugin> - - <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-dependency-plugin</artifactId> - <executions> - <execution> - <id>copy-dependencies</id> - <phase>package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <!-- basedir is spark/sql/hive/ --> - <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory> - <overWriteReleases>false</overWriteReleases> - <overWriteSnapshots>false</overWriteSnapshots> - <overWriteIfNewer>true</overWriteIfNewer> - <includeGroupIds>org.datanucleus</includeGroupIds> - </configuration> - </execution> - </executions> - </plugin> - </plugins> </build> </project> diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 4dd3ccdf3723d1c3dc972de1fb634d3390b81d76..336e29fc6bfd3fc401b1b8f3a816056648525f06 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -447,9 +447,6 @@ private[spark] class Client( * * Note that the archive cannot be a "local" URI. If none of the above settings are found, * then upload all files found in $SPARK_HOME/jars. - * - * TODO: currently the code looks in $SPARK_HOME/lib while the work to replace assemblies - * with a directory full of jars is ongoing. */ val sparkArchive = sparkConf.get(SPARK_ARCHIVE) if (sparkArchive.isDefined) { diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index 2eaafa072a3aeb65c6b28a40aa81b9b230440020..74e268dc4847327920ab8b0f91d3771bea8d5a36 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -273,7 +273,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll test("distribute local spark jars") { val temp = Utils.createTempDir() - val jarsDir = new File(temp, "lib") + val jarsDir = new File(temp, "jars") assert(jarsDir.mkdir()) val jar = TestUtils.createJarWithFiles(Map(), jarsDir) new FileOutputStream(new File(temp, "RELEASE")).close()