aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xmake-distribution.sh53
-rw-r--r--project/SparkBuild.scala28
2 files changed, 64 insertions, 17 deletions
diff --git a/make-distribution.sh b/make-distribution.sh
index 0a8941c1f8..a101024de5 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -24,9 +24,10 @@
# so it is completely self contained.
# It does not contain source or *.class files.
#
-# Arguments
-# (none): Creates dist/ directory
-# tgz: Additionally creates spark-$VERSION-bin.tar.gz
+# Optional Arguments
+# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
+# --hadoop VERSION: Builds against specified version of Hadoop.
+# --with-yarn: Enables support for Hadoop YARN.
#
# Recommended deploy/testing procedure (standalone mode):
# 1) Rsync / deploy the dist/ dir to one host
@@ -44,20 +45,50 @@ DISTDIR="$FWDIR/dist"
export TERM=dumb # Prevents color codes in SBT output
VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
-if [ "$1" == "tgz" ]; then
- echo "Making spark-$VERSION-bin.tar.gz"
+# Initialize defaults
+SPARK_HADOOP_VERSION=1.2.1
+SPARK_YARN_MODE=false
+MAKE_TGZ=false
+
+# Parse arguments
+while (( "$#" )); do
+ case $1 in
+ --hadoop)
+ SPARK_HADOOP_VERSION="$2"
+ shift
+ ;;
+ --with-yarn)
+ SPARK_YARN_MODE=true
+ ;;
+ --tgz)
+ MAKE_TGZ=true
+ ;;
+ esac
+ shift
+done
+
+if [ "$MAKE_TGZ" == "true" ]; then
+ echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
else
echo "Making distribution for Spark $VERSION in $DISTDIR..."
fi
+echo "Hadoop version set to $SPARK_HADOOP_VERSION"
+if [ "$SPARK_YARN_MODE" == "true" ]; then
+ echo "YARN enabled"
+else
+ echo "YARN disabled"
+fi
# Build fat JAR
-$FWDIR/sbt/sbt "repl/assembly"
+export SPARK_HADOOP_VERSION
+export SPARK_YARN_MODE
+"$FWDIR/sbt/sbt" "repl/assembly"
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/jars"
-echo "$VERSION" >$DISTDIR/RELEASE
+echo "$VERSION" > "$DISTDIR/RELEASE"
# Copy jars
cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/"
@@ -69,9 +100,9 @@ cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR"
cp "$FWDIR/spark-executor" "$DISTDIR"
-if [ "$1" == "tgz" ]; then
+if [ "$MAKE_TGZ" == "true" ]; then
TARDIR="$FWDIR/spark-$VERSION"
- cp -r $DISTDIR $TARDIR
- tar -zcf spark-$VERSION-bin.tar.gz -C $FWDIR spark-$VERSION
- rm -rf $TARDIR
+ cp -r "$DISTDIR" "$TARDIR"
+ tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
+ rm -rf "$TARDIR"
fi
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 350a36a964..23c7179919 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -24,10 +24,15 @@ import AssemblyKeys._
//import com.jsuereth.pgp.sbtplugin.PgpKeys._
object SparkBuild extends Build {
+ // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or
+ // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop.
+ val HADOOP_VERSION = "1.2.1"
+ val HADOOP_YARN = false
+
// HBase version; set as appropriate.
val HBASE_VERSION = "0.94.6"
- lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib, tools, yarn)
+ lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects:_*)
lazy val core = Project("core", file("core"), settings = coreSettings)
@@ -49,6 +54,17 @@ object SparkBuild extends Build {
lazy val MavenCompile = config("m2r") extend(Compile)
lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
+ // Allows build configuration to be set through environment variables
+ lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", HADOOP_VERSION)
+ lazy val isYarnMode = scala.util.Properties.envOrNone("SPARK_YARN_MODE") match {
+ case None => HADOOP_YARN
+ case Some(v) => v.toBoolean
+ }
+
+ // Conditionally include the yarn sub-project
+ lazy val maybeYarn = if(isYarnMode) Seq[ProjectReference](yarn) else Seq[ProjectReference]()
+ lazy val allProjects = Seq[ProjectReference](core, repl, examples, bagel, streaming, mllib, tools) ++ maybeYarn
+
def sharedSettings = Defaults.defaultSettings ++ Seq(
organization := "org.spark-project",
version := "0.8.0-SNAPSHOT",
@@ -170,7 +186,7 @@ object SparkBuild extends Build {
"org.apache.mesos" % "mesos" % "0.12.1",
"io.netty" % "netty-all" % "4.0.0.Beta2",
"org.apache.derby" % "derby" % "10.4.2.0" % "test",
- "org.apache.hadoop" % "hadoop-client" % "1.2.1" excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
"com.codahale.metrics" % "metrics-core" % "3.0.0",
"com.codahale.metrics" % "metrics-jvm" % "3.0.0",
"com.codahale.metrics" % "metrics-json" % "3.0.0",
@@ -239,10 +255,10 @@ object SparkBuild extends Build {
name := "spark-yarn",
libraryDependencies ++= Seq(
// Exclude rule required for all ?
- "org.apache.hadoop" % "hadoop-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm),
- "org.apache.hadoop" % "hadoop-yarn-api" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm),
- "org.apache.hadoop" % "hadoop-yarn-common" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm),
- "org.apache.hadoop" % "hadoop-yarn-client" % "2.0.2-alpha" excludeAll(excludeJackson, excludeNetty, excludeAsm)
+ "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm),
+ "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm)
)
) ++ assemblySettings ++ extraAssemblySettings