Skip to content
Snippets Groups Projects
  • Marcelo Vanzin's avatar
    24d7d2e4
    [SPARK-13579][BUILD] Stop building the main Spark assembly. · 24d7d2e4
    Marcelo Vanzin authored
    This change modifies the "assembly/" module to just copy needed
    dependencies to its build directory, and modifies the packaging
    script to pick those up (and remove duplicate jars packages in the
    examples module).
    
    I also made some minor adjustments to dependencies to remove some
    test jars from the final packaging, and remove jars that conflict with each
    other when packaged separately (e.g. servlet api).
    
    Also note that this change restores guava in applications' classpaths, even
    though it's still shaded inside Spark. This is now needed for the Hadoop
    libraries that are packaged with Spark, which now are not processed by
    the shade plugin.
    
    Author: Marcelo Vanzin <vanzin@cloudera.com>
    
    Closes #11796 from vanzin/SPARK-13579.
    24d7d2e4
    History
    [SPARK-13579][BUILD] Stop building the main Spark assembly.
    Marcelo Vanzin authored
    This change modifies the "assembly/" module to just copy needed
    dependencies to its build directory, and modifies the packaging
    script to pick those up (and remove duplicate jars packages in the
    examples module).
    
    I also made some minor adjustments to dependencies to remove some
    test jars from the final packaging, and remove jars that conflict with each
    other when packaged separately (e.g. servlet api).
    
    Also note that this change restores guava in applications' classpaths, even
    though it's still shaded inside Spark. This is now needed for the Hadoop
    libraries that are packaged with Spark, which now are not processed by
    the shade plugin.
    
    Author: Marcelo Vanzin <vanzin@cloudera.com>
    
    Closes #11796 from vanzin/SPARK-13579.
pom.xml 11.40 KiB
<?xml version="1.0" encoding="UTF-8"?>
<!--
  ~ Licensed to the Apache Software Foundation (ASF) under one or more
  ~ contributor license agreements.  See the NOTICE file distributed with
  ~ this work for additional information regarding copyright ownership.
  ~ The ASF licenses this file to You under the Apache License, Version 2.0
  ~ (the "License"); you may not use this file except in compliance with
  ~ the License.  You may obtain a copy of the License at
  ~
  ~    http://www.apache.org/licenses/LICENSE-2.0
  ~
  ~ Unless required by applicable law or agreed to in writing, software
  ~ distributed under the License is distributed on an "AS IS" BASIS,
  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ~ See the License for the specific language governing permissions and
  ~ limitations under the License.
  -->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-parent_2.11</artifactId>
    <version>2.0.0-SNAPSHOT</version>
    <relativePath>../pom.xml</relativePath>
  </parent>

  <groupId>org.apache.spark</groupId>
  <artifactId>spark-examples_2.11</artifactId>
  <packaging>jar</packaging>
  <name>Spark Project Examples</name>
  <url>http://spark.apache.org/</url>

  <properties>
    <sbt.project.name>examples</sbt.project.name>
    <build.testJarPhase>none</build.testJarPhase>
    <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
  </properties>

  <dependencies>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-mllib_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-hive_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming-flume_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
      <version>${project.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-protocol</artifactId>
      <version>${hbase.version}</version>
      <scope>${hbase.deps.scope}</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-common</artifactId>
      <version>${hbase.version}</version>
      <scope>${hbase.deps.scope}</scope>
      <exclusions>
        <exclusion>
          <!-- SPARK-4455 -->
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-annotations</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-client</artifactId>
      <version>${hbase.version}</version>
      <scope>${hbase.deps.scope}</scope>
      <exclusions>
        <exclusion>
          <!-- SPARK-4455 -->
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-annotations</artifactId>
        </exclusion>
       <exclusion>
        <groupId>io.netty</groupId>
        <artifactId>netty</artifactId>
       </exclusion>
     </exclusions>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-server</artifactId>
      <version>${hbase.version}</version>
      <scope>${hbase.deps.scope}</scope>
      <exclusions>
        <exclusion>
          <!-- SPARK-4455 -->
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-annotations</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-common</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-core</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-mapreduce-client-core</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-auth</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-annotations</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-hdfs</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-hadoop1-compat</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.commons</groupId>
          <artifactId>commons-math</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.sun.jersey</groupId>
          <artifactId>jersey-core</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-api</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.sun.jersey</groupId>
          <artifactId>jersey-server</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.sun.jersey</groupId>
          <artifactId>jersey-core</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.sun.jersey</groupId>
          <artifactId>jersey-json</artifactId>
        </exclusion>
        <exclusion>
          <!-- hbase uses v2.4, which is better, but ...-->
          <groupId>commons-io</groupId>
          <artifactId>commons-io</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-hadoop-compat</artifactId>
      <version>${hbase.version}</version>
      <scope>${hbase.deps.scope}</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-math3</artifactId>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>com.twitter</groupId>
      <artifactId>algebird-core_${scala.binary.version}</artifactId>
      <version>0.11.0</version>
    </dependency>
    <dependency>
      <groupId>org.scalacheck</groupId>
      <artifactId>scalacheck_${scala.binary.version}</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.cassandra</groupId>
      <artifactId>cassandra-all</artifactId>
      <version>1.2.19</version>
      <exclusions>
        <exclusion>
          <groupId>com.google.guava</groupId>
          <artifactId>guava</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
          <artifactId>concurrentlinkedhashmap-lru</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.ning</groupId>
          <artifactId>compress-lzf</artifactId>
        </exclusion>
        <exclusion>
          <groupId>commons-cli</groupId>
          <artifactId>commons-cli</artifactId>
        </exclusion>
        <exclusion>
          <groupId>commons-codec</groupId>
          <artifactId>commons-codec</artifactId>
        </exclusion>
        <exclusion>
          <groupId>commons-lang</groupId>
          <artifactId>commons-lang</artifactId>
        </exclusion>
        <exclusion>
          <groupId>commons-logging</groupId>
          <artifactId>commons-logging</artifactId>
        </exclusion>
        <exclusion>
          <groupId>io.netty</groupId>
          <artifactId>netty</artifactId>
        </exclusion>
        <exclusion>
          <groupId>jline</groupId>
          <artifactId>jline</artifactId>
        </exclusion>
        <exclusion>
          <groupId>net.jpountz.lz4</groupId>
          <artifactId>lz4</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.cassandra.deps</groupId>
          <artifactId>avro</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.commons</groupId>
          <artifactId>commons-math3</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.thrift</groupId>
          <artifactId>libthrift</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>com.github.scopt</groupId>
      <artifactId>scopt_${scala.binary.version}</artifactId>
      <version>3.3.0</version>
    </dependency>
  </dependencies>

  <build>
    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-deploy-plugin</artifactId>
        <configuration>
          <skip>true</skip>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-install-plugin</artifactId>
        <configuration>
          <skip>true</skip>
        </configuration>
      </plugin>
    </plugins>
  </build>
  <profiles>
    <profile>
      <id>kinesis-asl</id>
      <dependencies>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
          <version>${project.version}</version>
        </dependency>
      </dependencies>
    </profile>

    <!-- Profiles that disable inclusion of certain dependencies. -->
    <profile>
      <id>flume-provided</id>
      <properties>
        <flume.deps.scope>provided</flume.deps.scope>
      </properties>
    </profile>
    <profile>
      <id>hadoop-provided</id>
      <properties>
        <hadoop.deps.scope>provided</hadoop.deps.scope>
      </properties>
    </profile>
    <profile>
      <id>hbase-provided</id>
      <properties>
        <hbase.deps.scope>provided</hbase.deps.scope>
      </properties>
    </profile>
    <profile>
      <id>hive-provided</id>
      <properties>
        <hive.deps.scope>provided</hive.deps.scope>
      </properties>
    </profile>
    <profile>
      <id>parquet-provided</id>
      <properties>
        <parquet.deps.scope>provided</parquet.deps.scope>
      </properties>
    </profile>
  </profiles>
</project>