From c3f5e075335a65ea522b2f76716921ec056c52ed Mon Sep 17 00:00:00 2001 From: Patrick Wendell <pwendell@gmail.com> Date: Sun, 2 Mar 2014 15:18:19 -0800 Subject: [PATCH] SPARK-1121: Include avro for yarn-alpha builds This lets us explicitly include Avro based on a profile for 0.23.X builds. It makes me sad how convoluted it is to express this logic in Maven. @tgraves and @sryza curious if this works for you. I'm also considering just reverting to how it was before. The only real problem was that Spark advertised a dependency on Avro even though it only really depends transitively on Avro through other deps. Author: Patrick Wendell <pwendell@gmail.com> Closes #49 from pwendell/avro-build-fix and squashes the following commits: 8d6ee92 [Patrick Wendell] SPARK-1121: Add avro to yarn-alpha profile --- bagel/pom.xml | 14 ++++++++++++++ core/pom.xml | 14 ++++++++++++++ docs/building-with-maven.md | 4 ---- examples/pom.xml | 14 ++++++++++++++ external/flume/pom.xml | 14 ++++++++++++++ external/kafka/pom.xml | 14 ++++++++++++++ external/mqtt/pom.xml | 14 ++++++++++++++ external/twitter/pom.xml | 14 ++++++++++++++ external/zeromq/pom.xml | 14 ++++++++++++++ graphx/pom.xml | 14 ++++++++++++++ mllib/pom.xml | 14 ++++++++++++++ pom.xml | 15 +++++++++++++++ repl/pom.xml | 14 ++++++++++++++ streaming/pom.xml | 14 ++++++++++++++ tools/pom.xml | 14 ++++++++++++++ yarn/alpha/pom.xml | 14 ++++++++++++++ yarn/pom.xml | 17 +++++++++-------- yarn/stable/pom.xml | 14 ++++++++++++++ 18 files changed, 234 insertions(+), 12 deletions(-) diff --git a/bagel/pom.xml b/bagel/pom.xml index 355f437c5b..41aacbd88a 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project Bagel</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/core/pom.xml b/core/pom.xml index 2afd250825..99c841472b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project Core</name> <url>http://spark.apache.org/</url> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <profiles> + <profile> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md index 40cac8eb4f..ded1292688 100644 --- a/docs/building-with-maven.md +++ b/docs/building-with-maven.md @@ -76,7 +76,3 @@ The maven build includes support for building a Debian package containing the as $ mvn -Pdeb -DskipTests clean package The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions. - -## A note about Hadoop version 0.23.x - -For building spark with hadoop 0.23.x and also yarn, you will have to manually add a dependency on avro (org.apache.avro, avro, 1.7.4). diff --git a/examples/pom.xml b/examples/pom.xml index d952e2ca4e..3aba343f4c 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project Examples</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <repositories> <repository> <id>apache-repo</id> diff --git a/external/flume/pom.xml b/external/flume/pom.xml index bc00ab4573..8783aea3e4 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project External Flume</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 979eb0ca62..79dc38f984 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project External Kafka</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 2c476b402e..06c751df7f 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project External MQTT</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <repositories> <repository> <id>mqtt-repo</id> diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml index a443459594..37bb4fad64 100644 --- a/external/twitter/pom.xml +++ b/external/twitter/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project External Twitter</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml index a40e55876e..65ec0e26da 100644 --- a/external/zeromq/pom.xml +++ b/external/zeromq/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project External ZeroMQ</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/graphx/pom.xml b/graphx/pom.xml index 2b4d674221..5b54dd27ef 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project GraphX</name> <url>http://spark-project.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/mllib/pom.xml b/mllib/pom.xml index c0e745dadb..760a2a85d5 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project ML Library</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/pom.xml b/pom.xml index 7e04efaa9c..7e28d7c194 100644 --- a/pom.xml +++ b/pom.xml @@ -425,6 +425,21 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <version>1.7.4</version> + <exclusions> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + </exclusions> + </dependency> <dependency> <groupId>net.java.dev.jets3t</groupId> <artifactId>jets3t</artifactId> diff --git a/repl/pom.xml b/repl/pom.xml index 3a6baf5c42..aa01a17602 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project REPL</name> <url>http://spark.apache.org/</url> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <profiles> + <profile> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <properties> <deb.install.path>/usr/share/spark</deb.install.path> <deb.user>root</deb.user> diff --git a/streaming/pom.xml b/streaming/pom.xml index 1f3366e25f..91d6a1375a 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -31,6 +31,20 @@ <name>Spark Project Streaming</name> <url>http://spark.apache.org/</url> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <repositories> <repository> <id>apache-repo</id> diff --git a/tools/pom.xml b/tools/pom.xml index 67f2d780f3..b8dd255d40 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -30,6 +30,20 @@ <name>Spark Project Tools</name> <url>http://spark.apache.org/</url> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <profiles> + <profile> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml index e076ca1d44..bfe12ecec0 100644 --- a/yarn/alpha/pom.xml +++ b/yarn/alpha/pom.xml @@ -24,6 +24,20 @@ <relativePath>../pom.xml</relativePath> </parent> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <groupId>org.apache.spark</groupId> <artifactId>spark-yarn-alpha_2.10</artifactId> <packaging>jar</packaging> diff --git a/yarn/pom.xml b/yarn/pom.xml index be51679292..35e31760c1 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -52,14 +52,6 @@ <artifactId>hadoop-client</artifactId> <version>${yarn.version}</version> </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - </dependency> <dependency> <groupId>org.scalatest</groupId> <artifactId>scalatest_${scala.binary.version}</artifactId> @@ -78,6 +70,15 @@ <modules> <module>alpha</module> </modules> + + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> </profile> <profile> diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml index 0780f251b5..9d68603251 100644 --- a/yarn/stable/pom.xml +++ b/yarn/stable/pom.xml @@ -24,6 +24,20 @@ <relativePath>../pom.xml</relativePath> </parent> + <profiles> + <profile> + <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around + a Hadoop 0.23.X issue --> + <id>yarn-alpha</id> + <dependencies> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + </dependencies> + </profile> + </profiles> + <groupId>org.apache.spark</groupId> <artifactId>spark-yarn_2.10</artifactId> <packaging>jar</packaging> -- GitLab