From 030f2c2126d5075576cd6d83a1ee7462c48b953b Mon Sep 17 00:00:00 2001
From: witgo <witgo@qq.com>
Date: Mon, 28 Apr 2014 22:50:51 -0700
Subject: [PATCH] Improved build configuration

1, Fix SPARK-1441: compile spark core error with hadoop 0.23.x
2, Fix SPARK-1491: maven hadoop-provided profile fails to build
3, Fix org.scala-lang: * ,org.apache.avro:* inconsistent versions dependency
4, A modified on the sql/catalyst/pom.xml,sql/hive/pom.xml,sql/core/pom.xml (Four spaces formatted into two spaces)

Author: witgo <witgo@qq.com>

Closes #480 from witgo/format_pom and squashes the following commits:

03f652f [witgo] review commit
b452680 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom
bee920d [witgo] revert fix SPARK-1629: Spark Core missing commons-lang dependence
7382a07 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom
6902c91 [witgo] fix SPARK-1629: Spark Core missing commons-lang dependence
0da4bc3 [witgo] merge master
d1718ed [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom
e345919 [witgo] add avro dependency to yarn-alpha
77fad08 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom
62d0862 [witgo] Fix org.scala-lang: * inconsistent versions dependency
1a162d7 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom
934f24d [witgo] review commit
cf46edc [witgo] exclude jruby
06e7328 [witgo] Merge branch 'SparkBuild' into format_pom
99464d2 [witgo] fix maven hadoop-provided profile fails to build
0c6c1fc [witgo] Fix compile spark core error with hadoop 0.23.x
6851bec [witgo] Maintain consistent SparkBuild.scala, pom.xml
---
 bagel/pom.xml               |  14 ---
 core/pom.xml                |  22 -----
 docs/building-with-maven.md |  10 +-
 examples/pom.xml            |  18 +---
 external/flume/pom.xml      |  14 ---
 external/kafka/pom.xml      |  14 ---
 external/mqtt/pom.xml       |  14 ---
 external/twitter/pom.xml    |  14 ---
 external/zeromq/pom.xml     |  14 ---
 graphx/pom.xml              |  14 ---
 make-distribution.sh        |  15 ++-
 mllib/pom.xml               |  14 ---
 pom.xml                     |  79 +++++++++++++--
 project/SparkBuild.scala    |   9 +-
 repl/pom.xml                |  14 ---
 sql/catalyst/pom.xml        | 109 ++++++++++-----------
 sql/core/pom.xml            | 122 +++++++++++------------
 sql/hive/pom.xml            | 186 +++++++++++++++++-------------------
 streaming/pom.xml           |  14 ---
 tools/pom.xml               |  14 ---
 yarn/alpha/pom.xml          |  14 ---
 yarn/pom.xml                |   9 --
 yarn/stable/pom.xml         |  14 ---
 23 files changed, 295 insertions(+), 466 deletions(-)

diff --git a/bagel/pom.xml b/bagel/pom.xml
index 142f75c5d2..355f437c5b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project Bagel</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index 058b7acba7..73f573a414 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -30,19 +30,6 @@
   <packaging>jar</packaging>
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
-  <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a Hadoop 0.23.X issue -->
-  <profiles>
-    <profile>
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
@@ -147,15 +134,6 @@
       <groupId>org.json4s</groupId>
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
       <version>3.2.6</version>
-      <!-- see also exclusion for lift-json; this is necessary since it depends on
-         scala-library and scalap 2.10.0, but we use 2.10.4, and only override
-         scala-library -->
-      <exclusions>
-        <exclusion>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scalap</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <dependency>
       <groupId>colt</groupId>
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index 771d1933a1..a5e5303467 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -39,7 +39,10 @@ For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions wit
     # Cloudera CDH 4.2.0 with MapReduce v1
     $ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
 
-For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you should enable the "yarn-alpha" or "yarn" profile and set the "hadoop.version", "yarn.version" property:
+    # Apache Hadoop 0.23.x
+    $ mvn -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
+
+For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you can enable the "yarn-alpha" or "yarn" profile and set the "hadoop.version", "yarn.version" property. Note that Hadoop 0.23.X requires a special `-Phadoop-0.23` profile:
 
     # Apache Hadoop 2.0.5-alpha
     $ mvn -Pyarn-alpha -Dhadoop.version=2.0.5-alpha -Dyarn.version=2.0.5-alpha -DskipTests clean package
@@ -47,9 +50,12 @@ For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with
     # Cloudera CDH 4.2.0 with MapReduce v2
     $ mvn -Pyarn-alpha -Dhadoop.version=2.0.0-cdh4.2.0 -Dyarn.version=2.0.0-cdh4.2.0 -DskipTests clean package
 
-    # Apache Hadoop 2.2.X ( e.g. 2.2.0 as below ) and newer
+    # Apache Hadoop 2.2.X (e.g. 2.2.0 as below) and newer
     $ mvn -Pyarn -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -DskipTests clean package
 
+    # Apache Hadoop 0.23.x
+    $ mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -Dyarn.version=0.23.7 -DskipTests clean package
+
 ## Spark Tests in Maven ##
 
 Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). Some of the require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time. You can then run the tests with `mvn -Dhadoop.version=... test`.
diff --git a/examples/pom.xml b/examples/pom.xml
index 704d6df7c5..a2d1b19736 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -124,6 +110,10 @@
           <groupId>commons-logging</groupId>
           <artifactId>commons-logging</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.jruby</groupId>
+          <artifactId>jruby-complete</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index b84ca0c5e1..03d3b2394f 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External Flume</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 343e1fabd8..979eb0ca62 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External Kafka</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 3710a63541..9aa1c1a9f5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External MQTT</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-         <dependency>
-           <groupId>org.apache.avro</groupId>
-           <artifactId>avro</artifactId>
-         </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 398b9f4fba..a443459594 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External Twitter</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 77e957f404..a40e55876e 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External ZeroMQ</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/graphx/pom.xml b/graphx/pom.xml
index b4c67ddcd8..dc108d2fe7 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project GraphX</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/make-distribution.sh b/make-distribution.sh
index 4ac80efae0..c05dcd89d9 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -123,10 +123,19 @@ else
 fi
 
 if [ "$SPARK_YARN" == "true" ]; then
-  mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
-    -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  if [[ "$SPARK_HADOOP_VERSION" =~ "0.23." ]]; then
+    mvn clean package -DskipTests -Pyarn-alpha -Dhadoop.version=$SPARK_HADOOP_VERSION \
+      -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE -Phadoop-0.23
+  else
+    mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
+      -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  fi
 else
-  mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  if [[ "$SPARK_HADOOP_VERSION" =~ "0.23." ]]; then
+    mvn clean package -Phadoop-0.23 -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  else
+    mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  fi
 fi
 
 # Make directories
diff --git a/mllib/pom.xml b/mllib/pom.xml
index e7ce00efc4..cdd33dbb79 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project ML Library</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/pom.xml b/pom.xml
index e911799825..646753fe30 100644
--- a/pom.xml
+++ b/pom.xml
@@ -127,6 +127,7 @@
     <jetty.version>8.1.14.v20131031</jetty.version>
     <chill.version>0.3.6</chill.version>
     <codahale.metrics.version>3.0.0</codahale.metrics.version>
+    <avro.version>1.7.4</avro.version>
 
     <PermGen>64m</PermGen>
     <MaxPermGen>512m</MaxPermGen>
@@ -293,7 +294,7 @@
           <exclusion>
             <groupId>org.ow2.asm</groupId>
             <artifactId>asm-commons</artifactId>
-         </exclusion>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -308,7 +309,7 @@
           <exclusion>
             <groupId>org.ow2.asm</groupId>
             <artifactId>asm-commons</artifactId>
-         </exclusion>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -425,6 +426,16 @@
         <artifactId>scala-library</artifactId>
         <version>${scala.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scala-actors</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.scala-lang</groupId>
+        <artifactId>scalap</artifactId>
+        <version>${scala.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
@@ -492,7 +503,45 @@
       <dependency>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
-        <version>1.7.4</version>
+        <version>${avro.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-ipc</artifactId>
+        <version>${avro.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty-util</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.velocity</groupId>
+            <artifactId>velocity</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-mapred</artifactId>
+        <version>${avro.version}</version>
         <exclusions>
           <exclusion>
             <groupId>org.jboss.netty</groupId>
@@ -687,7 +736,7 @@
             <junitxml>.</junitxml>
             <filereports>${project.build.directory}/SparkTestSuite.txt</filereports>
             <argLine>-Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
-            <stderr />
+            <stderr/>
           </configuration>
           <executions>
             <execution>
@@ -742,7 +791,7 @@
         <artifactId>build-helper-maven-plugin</artifactId>
         <executions>
           <execution>
-           <id>add-scala-sources</id>
+            <id>add-scala-sources</id>
             <phase>generate-sources</phase>
             <goals>
               <goal>add-source</goal>
@@ -779,6 +828,17 @@
   </build>
 
   <profiles>
+    <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a Hadoop 0.23.X issue -->
+    <profile>
+      <id>hadoop-0.23</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+
     <profile>
       <id>yarn-alpha</id>
       <properties>
@@ -790,6 +850,12 @@
       <modules>
         <module>yarn</module>
       </modules>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </dependency>
+      </dependencies>
     </profile>
 
     <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
@@ -838,7 +904,7 @@
 
     </profile>
 
-   <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
+    <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
     <profile>
       <id>hadoop-provided</id>
       <activation>
@@ -878,6 +944,7 @@
         <dependency>
           <groupId>org.apache.zookeeper</groupId>
           <artifactId>zookeeper</artifactId>
+          <version>3.4.5</version>
           <scope>provided</scope>
         </dependency>
       </dependencies>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 5adfbe373c..a9504f3926 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -106,7 +106,7 @@ object SparkBuild extends Build {
     case Some(v) => v.toBoolean
   }
   lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
-  val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
+  val maybeAvro = if (hadoopVersion.startsWith("0.23.")) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
 
   lazy val isHiveEnabled = Properties.envOrNone("SPARK_HIVE") match {
     case None => DEFAULT_HIVE
@@ -224,7 +224,7 @@ object SparkBuild extends Build {
       <parent>
         <groupId>org.apache</groupId>
         <artifactId>apache</artifactId>
-        <version>13</version>
+        <version>14</version>
       </parent>
       <url>http://spark.apache.org/</url>
       <licenses>
@@ -250,7 +250,7 @@ object SparkBuild extends Build {
       </developers>
       <issueManagement>
         <system>JIRA</system>
-        <url>https://spark-project.atlassian.net/browse/SPARK</url>
+        <url>https://issues.apache.org/jira/browse/SPARK</url>
       </issueManagement>
     ),
 
@@ -313,6 +313,7 @@ object SparkBuild extends Build {
   val excludeCurator = ExclusionRule(organization = "org.apache.curator")
   val excludePowermock = ExclusionRule(organization = "org.powermock")
   val excludeFastutil = ExclusionRule(organization = "it.unimi.dsi")
+  val excludeJruby = ExclusionRule(organization = "org.jruby")
   val excludeThrift = ExclusionRule(organization = "org.apache.thrift")
 
   def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
@@ -417,7 +418,7 @@ object SparkBuild extends Build {
       v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" },
     libraryDependencies ++= Seq(
       "com.twitter"          %% "algebird-core"   % "0.1.11",
-      "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
+      "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeJruby),
       "org.apache.cassandra" % "cassandra-all" % "1.2.6"
         exclude("com.google.guava", "guava")
         exclude("com.googlecode.concurrentlinkedhashmap", "concurrentlinkedhashmap-lru")
diff --git a/repl/pom.xml b/repl/pom.xml
index 78d2fe13c2..b761a176ce 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project REPL</name>
   <url>http://spark.apache.org/</url>
 
-  <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-       a Hadoop 0.23.X issue -->
-  <profiles>
-    <profile>
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <properties>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 9d5c6a857b..8d2e4baf69 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -16,67 +16,56 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-    <parent>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-parent</artifactId>
-        <version>1.0.0-SNAPSHOT</version>
-        <relativePath>../../pom.xml</relativePath>
-    </parent>
-
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-catalyst_2.10</artifactId>
-    <packaging>jar</packaging>
-    <name>Spark Project Catalyst</name>
-    <url>http://spark.apache.org/</url>
+    <artifactId>spark-parent</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
 
-    <profiles>
-      <profile>
-        <id>yarn-alpha</id>
-        <dependencies>
-          <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-          </dependency>
-        </dependencies>
-      </profile>
-    </profiles>
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-catalyst_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Catalyst</name>
+  <url>http://spark.apache.org/</url>
 
-    <dependencies>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-reflect</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_${scala.binary.version}</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.typesafe</groupId>
-            <artifactId>scalalogging-slf4j_${scala.binary.version}</artifactId>
-            <version>1.0.1</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scalatest</groupId>
-            <artifactId>scalatest_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.scalacheck</groupId>
-            <artifactId>scalacheck_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-    </dependencies>
-    <build>
-        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-        <plugins>
-            <plugin>
-                <groupId>org.scalatest</groupId>
-                <artifactId>scalatest-maven-plugin</artifactId>
-            </plugin>
-        </plugins>
-    </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-reflect</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.typesafe</groupId>
+      <artifactId>scalalogging-slf4j_${scala.binary.version}</artifactId>
+      <version>1.0.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 85580ed6b8..fb3b190b4e 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -16,72 +16,62 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-    <parent>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-parent</artifactId>
-        <version>1.0.0-SNAPSHOT</version>
-        <relativePath>../../pom.xml</relativePath>
-    </parent>
-
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-sql_2.10</artifactId>
-    <packaging>jar</packaging>
-    <name>Spark Project SQL</name>
-    <url>http://spark.apache.org/</url>
-    <profiles>
-      <profile>
-        <id>yarn-alpha</id>
-        <dependencies>
-          <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-          </dependency>
-        </dependencies>
-      </profile>
-    </profiles>
+    <artifactId>spark-parent</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-sql_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project SQL</name>
+  <url>http://spark.apache.org/</url>
 
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_${scala.binary.version}</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.twitter</groupId>
-            <artifactId>parquet-column</artifactId>
-            <version>${parquet.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.twitter</groupId>
-            <artifactId>parquet-hadoop</artifactId>
-            <version>${parquet.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scalatest</groupId>
-            <artifactId>scalatest_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.scalacheck</groupId>
-            <artifactId>scalacheck_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-    </dependencies>
-    <build>
-        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-        <plugins>
-            <plugin>
-                <groupId>org.scalatest</groupId>
-                <artifactId>scalatest-maven-plugin</artifactId>
-            </plugin>
-        </plugins>
-    </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-column</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index a662da76ce..889d249146 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -16,105 +16,95 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-    <parent>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-parent</artifactId>
-        <version>1.0.0-SNAPSHOT</version>
-        <relativePath>../../pom.xml</relativePath>
-    </parent>
-
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-hive_2.10</artifactId>
-    <packaging>jar</packaging>
-    <name>Spark Project Hive</name>
-    <url>http://spark.apache.org/</url>
-    <profiles>
-      <profile>
-        <id>yarn-alpha</id>
-        <dependencies>
-          <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-          </dependency>
-        </dependencies>
-      </profile>
-    </profiles>
+    <artifactId>spark-parent</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-hive_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Hive</name>
+  <url>http://spark.apache.org/</url>
 
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_${scala.binary.version}</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_${scala.binary.version}</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hive</groupId>
-            <artifactId>hive-metastore</artifactId>
-            <version>${hive.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hive</groupId>
-            <artifactId>hive-exec</artifactId>
-            <version>${hive.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hive</groupId>
-            <artifactId>hive-serde</artifactId>
-            <version>${hive.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scalatest</groupId>
-            <artifactId>scalatest_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.scalacheck</groupId>
-            <artifactId>scalacheck_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-    </dependencies>
-    <build>
-        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-        <plugins>
-            <plugin>
-                <groupId>org.scalatest</groupId>
-                <artifactId>scalatest-maven-plugin</artifactId>
-            </plugin>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-metastore</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-exec</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-serde</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
 
-            <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-dependency-plugin</artifactId>
-                <version>2.4</version>
-                <executions>
-                    <execution>
-                        <id>copy-dependencies</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>copy-dependencies</goal>
-                        </goals>
-                        <configuration>
-                            <!-- basedir is spark/sql/hive/ -->
-                            <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
-                            <overWriteReleases>false</overWriteReleases>
-                            <overWriteSnapshots>false</overWriteSnapshots>
-                            <overWriteIfNewer>true</overWriteIfNewer>
-                            <includeGroupIds>org.datanucleus</includeGroupIds>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-    </build>
+      <!-- Deploy datanucleus jars to the spark/lib_managed/jars directory -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <!-- basedir is spark/sql/hive/ -->
+              <outputDirectory>${basedir}/../../lib_managed/jars</outputDirectory>
+              <overWriteReleases>false</overWriteReleases>
+              <overWriteSnapshots>false</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+              <includeGroupIds>org.datanucleus</includeGroupIds>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 93b1c5a37a..6435224a14 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project Streaming</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/tools/pom.xml b/tools/pom.xml
index ae2ba64e07..1875c497bc 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -30,20 +30,6 @@
   <name>Spark Project Tools</name>
   <url>http://spark.apache.org/</url>
 
-  <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-       a Hadoop 0.23.X issue -->
-  <profiles>
-    <profile>
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
index d0aeaceb0d..e076ca1d44 100644
--- a/yarn/alpha/pom.xml
+++ b/yarn/alpha/pom.xml
@@ -24,20 +24,6 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn-alpha_2.10</artifactId>
   <packaging>jar</packaging>
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3342cb65ed..02f3662743 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -70,15 +70,6 @@
       <modules>
         <module>alpha</module>
       </modules>
-
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <dependencies>
-         <dependency>
-           <groupId>org.apache.avro</groupId>
-           <artifactId>avro</artifactId>
-         </dependency>
-      </dependencies>
     </profile>
 
     <profile>
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index e7915d12ae..0780f251b5 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -24,20 +24,6 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-yarn_2.10</artifactId>
   <packaging>jar</packaging>
-- 
GitLab