Skip to content
Snippets Groups Projects
Commit 3f98eff6 authored by Jey Kottalam's avatar Jey Kottalam
Browse files

Allow make-distribution.sh to specify Hadoop version used

parent a0f08484
No related branches found
No related tags found
No related merge requests found
......@@ -24,9 +24,10 @@
# so it is completely self contained.
# It does not contain source or *.class files.
#
# Arguments
# (none): Creates dist/ directory
# tgz: Additionally creates spark-$VERSION-bin.tar.gz
# Optional Arguments
# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
# --hadoop VERSION: Builds against specified version of Hadoop.
# --with-yarn: Enables support for Hadoop YARN.
#
# Recommended deploy/testing procedure (standalone mode):
# 1) Rsync / deploy the dist/ dir to one host
......@@ -44,20 +45,50 @@ DISTDIR="$FWDIR/dist"
export TERM=dumb # Prevents color codes in SBT output
VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
if [ "$1" == "tgz" ]; then
echo "Making spark-$VERSION-bin.tar.gz"
# Initialize defaults
SPARK_HADOOP_VERSION=1.2.1
SPARK_YARN_MODE=false
MAKE_TGZ=false
# Parse arguments
while (( "$#" )); do
case $1 in
--hadoop)
SPARK_HADOOP_VERSION="$2"
shift
;;
--with-yarn)
SPARK_YARN_MODE=true
;;
--tgz)
MAKE_TGZ=true
;;
esac
shift
done
if [ "$MAKE_TGZ" == "true" ]; then
echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
else
echo "Making distribution for Spark $VERSION in $DISTDIR..."
fi
echo "Hadoop version set to $SPARK_HADOOP_VERSION"
if [ "$SPARK_YARN_MODE" == "true" ]; then
echo "YARN enabled"
else
echo "YARN disabled"
fi
# Build fat JAR
$FWDIR/sbt/sbt "repl/assembly"
export SPARK_HADOOP_VERSION
export SPARK_YARN_MODE
"$FWDIR/sbt/sbt" "repl/assembly"
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/jars"
echo "$VERSION" >$DISTDIR/RELEASE
echo "$VERSION" > "$DISTDIR/RELEASE"
# Copy jars
cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/"
......@@ -69,9 +100,9 @@ cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR"
cp "$FWDIR/spark-executor" "$DISTDIR"
if [ "$1" == "tgz" ]; then
if [ "$MAKE_TGZ" == "true" ]; then
TARDIR="$FWDIR/spark-$VERSION"
cp -r $DISTDIR $TARDIR
tar -zcf spark-$VERSION-bin.tar.gz -C $FWDIR spark-$VERSION
rm -rf $TARDIR
cp -r "$DISTDIR" "$TARDIR"
tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
rm -rf "$TARDIR"
fi
......@@ -24,10 +24,15 @@ import AssemblyKeys._
//import com.jsuereth.pgp.sbtplugin.PgpKeys._
object SparkBuild extends Build {
// Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or
// "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop.
val HADOOP_VERSION = "1.2.1"
val HADOOP_YARN = false
// HBase version; set as appropriate.
val HBASE_VERSION = "0.94.6"
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib, tools, yarn)
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects:_*)
lazy val core = Project("core", file("core"), settings = coreSettings)
......@@ -49,6 +54,17 @@ object SparkBuild extends Build {
lazy val MavenCompile = config("m2r") extend(Compile)
lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
// Allows build configuration to be set through environment variables
lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", HADOOP_VERSION)
lazy val isYarnMode = scala.util.Properties.envOrNone("SPARK_YARN_MODE") match {
case None => HADOOP_YARN
case Some(v) => v.toBoolean
}
// Conditionally include the yarn sub-project
lazy val maybeYarn = if(isYarnMode) Seq[ProjectReference](yarn) else Seq[ProjectReference]()
lazy val allProjects = Seq[ProjectReference](core, repl, examples, bagel, streaming, mllib, tools) ++ maybeYarn
def sharedSettings = Defaults.defaultSettings ++ Seq(
organization := "org.spark-project",
version := "0.8.0-SNAPSHOT",
......@@ -170,7 +186,7 @@ object SparkBuild extends Build {
"org.apache.mesos" % "mesos" % "0.12.1",
"io.netty" % "netty-all" % "4.0.0.Beta2",
"org.apache.derby" % "derby" % "10.4.2.0" % "test",
"org.apache.hadoop" % "hadoop-client" % "1.2.1" excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
"com.codahale.metrics" % "metrics-core" % "3.0.0",
"com.codahale.metrics" % "metrics-jvm" % "3.0.0",
"com.codahale.metrics" % "metrics-json" % "3.0.0",
......@@ -239,10 +255,10 @@ object SparkBuild extends Build {
name := "spark-yarn",
libraryDependencies ++= Seq(
// Exclude rule required for all ?
"org.apache.hadoop" % "hadoop-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-yarn-api" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-yarn-common" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-yarn-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm)
"org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
"org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm)
)
) ++ assemblySettings ++ extraAssemblySettings
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment