From 666d93c294458cb056cb590eb11bb6cf979861e5 Mon Sep 17 00:00:00 2001 From: Matei Zaharia <matei@eecs.berkeley.edu> Date: Tue, 27 Aug 2013 19:23:54 -0700 Subject: [PATCH] Update Maven build to create assemblies expected by new scripts This includes the following changes: - The "assembly" package now builds in Maven by default, and creates an assembly containing both hadoop-client and Spark, unlike the old BigTop distribution assembly that skipped hadoop-client - There is now a bigtop-dist package to build the old BigTop assembly - The repl-bin package is no longer built by default since the scripts don't reply on it; instead it can be enabled with -Prepl-bin - Py4J is now included in the assembly/lib folder as a local Maven repo, so that the Maven package can link to it - run-example now adds the original Spark classpath as well because the Maven examples assembly lists spark-core and such as provided - The various Maven projects add a spark-yarn dependency correctly --- {core => assembly}/lib/PY4J_LICENSE.txt | 0 {core => assembly}/lib/PY4J_VERSION.txt | 0 .../lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar | Bin .../lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom | 9 ++ .../net/sf/py4j/py4j/maven-metadata-local.xml | 12 ++ assembly/pom.xml | 138 +++++++++++++++--- assembly/src/main/assembly/assembly.xml | 19 ++- examples/pom.xml | 62 +++++++- pom.xml | 16 +- project/SparkBuild.scala | 4 +- run-example | 9 +- 11 files changed, 222 insertions(+), 47 deletions(-) rename {core => assembly}/lib/PY4J_LICENSE.txt (100%) rename {core => assembly}/lib/PY4J_VERSION.txt (100%) rename core/lib/py4j0.7.jar => assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar (100%) create mode 100644 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom create mode 100644 assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml diff --git a/core/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt similarity index 100% rename from core/lib/PY4J_LICENSE.txt rename to assembly/lib/PY4J_LICENSE.txt diff --git a/core/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt similarity index 100% rename from core/lib/PY4J_VERSION.txt rename to assembly/lib/PY4J_VERSION.txt diff --git a/core/lib/py4j0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar similarity index 100% rename from core/lib/py4j0.7.jar rename to assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom new file mode 100644 index 0000000000..1c730e19b4 --- /dev/null +++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <modelVersion>4.0.0</modelVersion> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + <version>0.7</version> + <description>POM was created from install:install-file</description> +</project> diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml new file mode 100644 index 0000000000..6942ff45e7 --- /dev/null +++ b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="UTF-8"?> +<metadata> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + <versioning> + <release>0.7</release> + <versions> + <version>0.7</version> + </versions> + <lastUpdated>20130828020333</lastUpdated> + </versioning> +</metadata> diff --git a/assembly/pom.xml b/assembly/pom.xml index ca20ccadba..74990b6361 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -1,4 +1,21 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> @@ -13,29 +30,13 @@ <name>Spark Project Assembly</name> <url>http://spark-project.org/</url> - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-assembly-plugin</artifactId> - <version>2.4</version> - <executions> - <execution> - <id>dist</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - <configuration> - <descriptors> - <descriptor>src/main/assembly/assembly.xml</descriptor> - </descriptors> - </configuration> - </execution> - </executions> - </plugin> - </plugins> - </build> + <repositories> + <!-- A repository in the local filesystem for the Py4J JAR, which is not in Maven central --> + <repository> + <id>lib</id> + <url>file://${project.basedir}/lib</url> + </repository> + </repositories> <dependencies> <dependency> @@ -63,5 +64,96 @@ <artifactId>spark-streaming</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + <version>0.7</version> + </dependency> </dependencies> + + <build> + <plugins> + <!-- Use the shade plugin to create a big JAR with all the dependencies --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</outputFile> + <artifactSet> + <includes> + <include>*:*</include> + </includes> + </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <profiles> + <profile> + <id>hadoop2-yarn</id> + <dependencies> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-yarn</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + <profile> + <id>bigtop-dist</id> + <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop + that contains Spark but not the Hadoop JARs it depends on. --> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + <version>2.4</version> + <executions> + <execution> + <id>dist</id> + <phase>package</phase> + <goals> + <goal>single</goal> + </goals> + <configuration> + <descriptors> + <descriptor>src/main/assembly/assembly.xml</descriptor> + </descriptors> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> </project> diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml index 14485b7181..4543b52c93 100644 --- a/assembly/src/main/assembly/assembly.xml +++ b/assembly/src/main/assembly/assembly.xml @@ -1,3 +1,19 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> <assembly> <id>dist</id> <formats> @@ -36,7 +52,8 @@ </directory> <outputDirectory>/bin</outputDirectory> <includes> - <include>run*</include> + <include>run-example*</include> + <include>spark-class*</include> <include>spark-shell*</include> <include>spark-executor*</include> </includes> diff --git a/examples/pom.xml b/examples/pom.xml index d24bd404fa..687fbcca8f 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -36,21 +36,25 @@ <groupId>org.spark-project</groupId> <artifactId>spark-core</artifactId> <version>${project.version}</version> + <scope>provided</scope> </dependency> <dependency> <groupId>org.spark-project</groupId> <artifactId>spark-streaming</artifactId> <version>${project.version}</version> + <scope>provided</scope> </dependency> <dependency> <groupId>org.spark-project</groupId> <artifactId>spark-mllib</artifactId> <version>${project.version}</version> + <scope>provided</scope> </dependency> <dependency> <groupId>org.spark-project</groupId> <artifactId>spark-bagel</artifactId> <version>${project.version}</version> + <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hbase</groupId> @@ -67,10 +71,6 @@ </exclusion> </exclusions> </dependency> - <dependency> - <groupId>org.scala-lang</groupId> - <artifactId>scala-library</artifactId> - </dependency> <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> @@ -126,13 +126,63 @@ </exclusions> </dependency> </dependencies> + + <profiles> + <profile> + <id>hadoop2-yarn</id> + <dependencies> + <dependency> + <groupId>org.spark-project</groupId> + <artifactId>spark-yarn</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + </dependencies> + </profile> + </profiles> + <build> <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> <plugins> <plugin> - <groupId>org.scalatest</groupId> - <artifactId>scalatest-maven-plugin</artifactId> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <configuration> + <shadedArtifactAttached>false</shadedArtifactAttached> + <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile> + <artifactSet> + <includes> + <include>*:*</include> + </includes> + </artifactSet> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + </configuration> + </execution> + </executions> </plugin> </plugins> </build> diff --git a/pom.xml b/pom.xml index ea9548359f..e2fd54a966 100644 --- a/pom.xml +++ b/pom.xml @@ -62,6 +62,7 @@ <module>tools</module> <module>streaming</module> <module>repl</module> + <module>assembly</module> </modules> <properties> @@ -75,7 +76,7 @@ <slf4j.version>1.7.2</slf4j.version> <log4j.version>1.2.17</log4j.version> <hadoop.version>1.0.4</hadoop.version> - <!-- <hadoop.version>2.0.0-mr1-cdh4.1.2</hadoop.version> --> + <hbase.version>0.94.6</hbase.version> <PermGen>64m</PermGen> <MaxPermGen>512m</MaxPermGen> @@ -743,21 +744,10 @@ </dependencyManagement> </profile> <profile> - <id>assembly</id> + <id>repl-bin</id> <activation> <activeByDefault>false</activeByDefault> </activation> - <modules> - <module>assembly</module> - </modules> - </profile> - <profile> - <id>expensive-modules</id> - <activation> - <property> - <name>!noExpensive</name> - </property> - </activation> <modules> <module>repl-bin</module> </modules> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8797e65b8d..2e26812671 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -41,7 +41,7 @@ object SparkBuild extends Build { .dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, bagel, streaming) + .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*) lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) @@ -261,7 +261,7 @@ object SparkBuild extends Build { def yarnSettings = sharedSettings ++ Seq( name := "spark-yarn" - ) ++ extraYarnSettings ++ assemblySettings ++ extraAssemblySettings + ) ++ extraYarnSettings // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). diff --git a/run-example b/run-example index e1b26257e1..ccd4356bdf 100755 --- a/run-example +++ b/run-example @@ -54,6 +54,11 @@ if [[ -z $SPARK_EXAMPLES_JAR ]]; then exit 1 fi +# Since the examples JAR ideally shouldn't include spark-core (that dependency should be +# "provided"), also add our standard Spark classpath, built using compute-classpath.sh. +CLASSPATH=`$FWDIR/bin/compute-classpath.sh` +CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH" + # Find java binary if [ -n "${JAVA_HOME}" ]; then RUNNER="${JAVA_HOME}/bin/java" @@ -68,9 +73,9 @@ fi if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then echo -n "Spark Command: " - echo "$RUNNER" -cp "$SPARK_EXAMPLES_JAR" "$@" + echo "$RUNNER" -cp "$CLASSPATH" "$@" echo "========================================" echo fi -exec "$RUNNER" -cp "$SPARK_EXAMPLES_JAR" "$@" +exec "$RUNNER" -cp "$CLASSPATH" "$@" -- GitLab