Merge pull request #675 from c0s/assembly

Building spark assembly for further consumption of the Spark project with a deployed cluster

Merge pull request #675 from c0s/assembly
5584ebcb · Matei Zaharia · a73f3ee5 · f4d51481 · 5584ebcb · 5584ebcb
Commit 5584ebcb authored 12 years ago by Matei Zaharia
--- a/assembly/README
+++ b/assembly/README
+This is an assembly module for Spark project.
+
+It creates a single tar.gz file that includes all needed dependency of the project
+except for org.apache.hadoop.* jars that are supposed to be available from the
+deployed Hadoop cluster.
+
+This module is off by default to avoid spending extra time on top of repl-bin
+module. To activate it specify the profile in the command line
+  -Passembly
+
+In case you want to avoid building time-expensive repl-bin module, that shaders
+all the dependency into a big flat jar supplement maven command with
+  -DnoExpensive
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.spark-project</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.8.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.spark-project</groupId>
+  <artifactId>spark-assembly</artifactId>
+  <name>Spark Project Assembly</name>
+  <url>http://spark-project.org/</url>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+          <id>dist</id>
+          <phase>package</phase>
+          <goals>
+            <goal>single</goal>
+          </goals>
+          <configuration>
+            <descriptors>
+              <descriptor>src/main/assembly/assembly.xml</descriptor>
+            </descriptors>
+          </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>hadoop1</id>
+      <properties>
+        <classifier.name>hadoop1</classifier.name>
+      </properties>
+    </profile>
+    <profile>
+      <id>hadoop2</id>
+      <properties>
+        <classifier.name>hadoop2</classifier.name>
+      </properties>
+    </profile>
+    <profile>
+      <id>hadoop2-yarn</id>
+      <properties>
+        <classifier.name>hadoop2-yarn</classifier.name>
+      </properties>
+    </profile>
+  </profiles>
+  <dependencies>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-core</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-bagel</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-examples</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-examples</artifactId>
+      <classifier>javadoc</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-examples</artifactId>
+      <classifier>sources</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-repl</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.spark-project</groupId>
+      <artifactId>spark-streaming</artifactId>
+      <classifier>${classifier.name}</classifier>
+      <version>0.8.0-SNAPSHOT</version>
+    </dependency>
+  </dependencies>
+</project>
\ No newline at end of file
--- a/assembly/src/main/assembly/assembly.xml
+++ b/assembly/src/main/assembly/assembly.xml
+<assembly>
+  <id>dist</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>dir</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <fileSets>
+    <fileSet>
+      <includes>
+        <include>README</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}/core/src/main/resources/spark/ui/static/
+      </directory>
+      <outputDirectory>/ui-resources/spark/ui/static</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}/bin/
+      </directory>
+      <outputDirectory>/bin</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}
+      </directory>
+      <outputDirectory>/bin</outputDirectory>
+      <includes>
+        <include>run*</include>
+        <include>spark-shell*</include>
+        <include>spark-executor*</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+
+  <dependencySets>
+    <dependencySet>
+      <includes>
+        <include>org.spark-project:*:jar</include>
+      </includes>
+      <excludes>
+        <exclude>org.spark-project:spark-dist:jar</exclude>
+      </excludes>
+    </dependencySet>
+    <dependencySet>
+      <outputDirectory>lib</outputDirectory>
+      <useTransitiveDependencies>true</useTransitiveDependencies>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+      <useProjectArtifact>false</useProjectArtifact>
+      <excludes>
+        <exclude>org.apache.hadoop:*:jar</exclude>
+        <exclude>org.spark-project:*:jar</exclude>
+      </excludes>
+    </dependencySet>
+  </dependencySets>
+
+</assembly>
--- a/pom.xml
+++ b/pom.xml
@@ -61,7 +61,6 @@
    <module>tools</module>
    <module>streaming</module>
    <module>repl</module>
-    <module>repl-bin</module>
  </modules>

  <properties>
@@ -627,5 +626,25 @@
        </dependencies>
      </dependencyManagement>
    </profile>
+    <profile>
+      <id>assembly</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <modules>
+        <module>assembly</module>
+      </modules>
+    </profile>
+    <profile>
+      <id>expensive-modules</id>
+      <activation>
+        <property>
+          <name>!noExpensive</name>
+        </property>
+      </activation>
+      <modules>
+        <module>repl-bin</module>
+      </modules>
+    </profile>
  </profiles>
 </project>
--- a/spark-shell
+++ b/spark-shell
@@ -79,6 +79,7 @@ if [[ ! $? ]]; then
  saved_stty=""
 fi

+export SPARK_LAUNCH_WITH_SCALA=${SPARK_LAUNCH_WITH_SCALA:-1}
 $FWDIR/run $OPTIONS spark.repl.Main "$@"

 # record the exit status lest it be overwritten: