From c3f5e075335a65ea522b2f76716921ec056c52ed Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Sun, 2 Mar 2014 15:18:19 -0800
Subject: [PATCH] SPARK-1121: Include avro for yarn-alpha builds

This lets us explicitly include Avro based on a profile for 0.23.X
builds. It makes me sad how convoluted it is to express this logic
in Maven. @tgraves and @sryza curious if this works for you.

I'm also considering just reverting to how it was before. The only
real problem was that Spark advertised a dependency on Avro
even though it only really depends transitively on Avro through
other deps.

Author: Patrick Wendell <pwendell@gmail.com>

Closes #49 from pwendell/avro-build-fix and squashes the following commits:

8d6ee92 [Patrick Wendell] SPARK-1121: Add avro to yarn-alpha profile
---
 bagel/pom.xml               | 14 ++++++++++++++
 core/pom.xml                | 14 ++++++++++++++
 docs/building-with-maven.md |  4 ----
 examples/pom.xml            | 14 ++++++++++++++
 external/flume/pom.xml      | 14 ++++++++++++++
 external/kafka/pom.xml      | 14 ++++++++++++++
 external/mqtt/pom.xml       | 14 ++++++++++++++
 external/twitter/pom.xml    | 14 ++++++++++++++
 external/zeromq/pom.xml     | 14 ++++++++++++++
 graphx/pom.xml              | 14 ++++++++++++++
 mllib/pom.xml               | 14 ++++++++++++++
 pom.xml                     | 15 +++++++++++++++
 repl/pom.xml                | 14 ++++++++++++++
 streaming/pom.xml           | 14 ++++++++++++++
 tools/pom.xml               | 14 ++++++++++++++
 yarn/alpha/pom.xml          | 14 ++++++++++++++
 yarn/pom.xml                | 17 +++++++++--------
 yarn/stable/pom.xml         | 14 ++++++++++++++
 18 files changed, 234 insertions(+), 12 deletions(-)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index 355f437c5b..41aacbd88a 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project Bagel</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index 2afd250825..99c841472b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -31,6 +31,20 @@
     <name>Spark Project Core</name>
     <url>http://spark.apache.org/</url>
 
+    <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+         a Hadoop 0.23.X issue -->
+    <profiles>
+      <profile>
+        <id>yarn-alpha</id>
+        <dependencies>
+           <dependency>
+             <groupId>org.apache.avro</groupId>
+             <artifactId>avro</artifactId>
+           </dependency>
+        </dependencies>
+      </profile>
+    </profiles>
+
     <dependencies>
         <dependency>
             <groupId>org.apache.hadoop</groupId>
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index 40cac8eb4f..ded1292688 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -76,7 +76,3 @@ The maven build includes support for building a Debian package containing the as
     $ mvn -Pdeb -DskipTests clean package
 
 The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
-
-## A note about Hadoop version 0.23.x
-
-For building spark with hadoop 0.23.x and also yarn, you will have to manually add a dependency on avro (org.apache.avro, avro, 1.7.4).
diff --git a/examples/pom.xml b/examples/pom.xml
index d952e2ca4e..3aba343f4c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <repositories>
     <repository>
       <id>apache-repo</id>
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index bc00ab4573..8783aea3e4 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External Flume</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 979eb0ca62..79dc38f984 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External Kafka</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 2c476b402e..06c751df7f 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External MQTT</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <repositories>
     <repository>
       <id>mqtt-repo</id>
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index a443459594..37bb4fad64 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External Twitter</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index a40e55876e..65ec0e26da 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project External ZeroMQ</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 2b4d674221..5b54dd27ef 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project GraphX</name>
   <url>http://spark-project.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index c0e745dadb..760a2a85d5 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project ML Library</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/pom.xml b/pom.xml
index 7e04efaa9c..7e28d7c194 100644
--- a/pom.xml
+++ b/pom.xml
@@ -425,6 +425,21 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro</artifactId>
+        <version>1.7.4</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
       <dependency>
         <groupId>net.java.dev.jets3t</groupId>
         <artifactId>jets3t</artifactId>
diff --git a/repl/pom.xml b/repl/pom.xml
index 3a6baf5c42..aa01a17602 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project REPL</name>
   <url>http://spark.apache.org/</url>
 
+  <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+       a Hadoop 0.23.X issue -->
+  <profiles>
+    <profile>
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <properties>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 1f3366e25f..91d6a1375a 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -31,6 +31,20 @@
   <name>Spark Project Streaming</name>
   <url>http://spark.apache.org/</url>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <repositories>
     <repository>
       <id>apache-repo</id>
diff --git a/tools/pom.xml b/tools/pom.xml
index 67f2d780f3..b8dd255d40 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -30,6 +30,20 @@
   <name>Spark Project Tools</name>
   <url>http://spark.apache.org/</url>
 
+  <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+       a Hadoop 0.23.X issue -->
+  <profiles>
+    <profile>
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
index e076ca1d44..bfe12ecec0 100644
--- a/yarn/alpha/pom.xml
+++ b/yarn/alpha/pom.xml
@@ -24,6 +24,20 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn-alpha_2.10</artifactId>
   <packaging>jar</packaging>
diff --git a/yarn/pom.xml b/yarn/pom.xml
index be51679292..35e31760c1 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -52,14 +52,6 @@
       <artifactId>hadoop-client</artifactId>
       <version>${yarn.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-ipc</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -78,6 +70,15 @@
       <modules>
         <module>alpha</module>
       </modules>
+
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
     </profile>
 
     <profile>
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 0780f251b5..9d68603251 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -24,6 +24,20 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
+  <profiles>
+    <profile>
+      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
+           a Hadoop 0.23.X issue -->
+      <id>yarn-alpha</id>
+      <dependencies>
+         <dependency>
+           <groupId>org.apache.avro</groupId>
+           <artifactId>avro</artifactId>
+         </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn_2.10</artifactId>
   <packaging>jar</packaging>
-- 
GitLab