Skip to content
Snippets Groups Projects
Commit 5584ebcb authored by Matei Zaharia's avatar Matei Zaharia
Browse files

Merge pull request #675 from c0s/assembly

Building spark assembly for further consumption of the Spark project with a deployed cluster
parents a73f3ee5 f4d51481
No related branches found
No related tags found
No related merge requests found
This is an assembly module for Spark project.
It creates a single tar.gz file that includes all needed dependency of the project
except for org.apache.hadoop.* jars that are supposed to be available from the
deployed Hadoop cluster.
This module is off by default to avoid spending extra time on top of repl-bin
module. To activate it specify the profile in the command line
-Passembly
In case you want to avoid building time-expensive repl-bin module, that shaders
all the dependency into a big flat jar supplement maven command with
-DnoExpensive
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.spark-project</groupId>
<artifactId>spark-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<groupId>org.spark-project</groupId>
<artifactId>spark-assembly</artifactId>
<name>Spark Project Assembly</name>
<url>http://spark-project.org/</url>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>dist</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>hadoop1</id>
<properties>
<classifier.name>hadoop1</classifier.name>
</properties>
</profile>
<profile>
<id>hadoop2</id>
<properties>
<classifier.name>hadoop2</classifier.name>
</properties>
</profile>
<profile>
<id>hadoop2-yarn</id>
<properties>
<classifier.name>hadoop2-yarn</classifier.name>
</properties>
</profile>
</profiles>
<dependencies>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-core</artifactId>
<classifier>${classifier.name}</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-bagel</artifactId>
<classifier>${classifier.name}</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-examples</artifactId>
<classifier>${classifier.name}</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-examples</artifactId>
<classifier>javadoc</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-examples</artifactId>
<classifier>sources</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-repl</artifactId>
<classifier>${classifier.name}</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-streaming</artifactId>
<classifier>${classifier.name}</classifier>
<version>0.8.0-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
<assembly>
<id>dist</id>
<formats>
<format>tar.gz</format>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<includes>
<include>README</include>
</includes>
</fileSet>
<fileSet>
<directory>
${project.parent.basedir}/core/src/main/resources/spark/ui/static/
</directory>
<outputDirectory>/ui-resources/spark/ui/static</outputDirectory>
<includes>
<include>**/*</include>
</includes>
</fileSet>
<fileSet>
<directory>
${project.parent.basedir}/bin/
</directory>
<outputDirectory>/bin</outputDirectory>
<includes>
<include>**/*</include>
</includes>
</fileSet>
<fileSet>
<directory>
${project.parent.basedir}
</directory>
<outputDirectory>/bin</outputDirectory>
<includes>
<include>run*</include>
<include>spark-shell*</include>
<include>spark-executor*</include>
</includes>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<includes>
<include>org.spark-project:*:jar</include>
</includes>
<excludes>
<exclude>org.spark-project:spark-dist:jar</exclude>
</excludes>
</dependencySet>
<dependencySet>
<outputDirectory>lib</outputDirectory>
<useTransitiveDependencies>true</useTransitiveDependencies>
<unpack>false</unpack>
<scope>runtime</scope>
<useProjectArtifact>false</useProjectArtifact>
<excludes>
<exclude>org.apache.hadoop:*:jar</exclude>
<exclude>org.spark-project:*:jar</exclude>
</excludes>
</dependencySet>
</dependencySets>
</assembly>
......@@ -61,7 +61,6 @@
<module>tools</module>
<module>streaming</module>
<module>repl</module>
<module>repl-bin</module>
</modules>
<properties>
......@@ -627,5 +626,25 @@
</dependencies>
</dependencyManagement>
</profile>
<profile>
<id>assembly</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<modules>
<module>assembly</module>
</modules>
</profile>
<profile>
<id>expensive-modules</id>
<activation>
<property>
<name>!noExpensive</name>
</property>
</activation>
<modules>
<module>repl-bin</module>
</modules>
</profile>
</profiles>
</project>
......@@ -79,6 +79,7 @@ if [[ ! $? ]]; then
saved_stty=""
fi
export SPARK_LAUNCH_WITH_SCALA=${SPARK_LAUNCH_WITH_SCALA:-1}
$FWDIR/run $OPTIONS spark.repl.Main "$@"
# record the exit status lest it be overwritten:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment