aboutsummaryrefslogtreecommitdiff
path: root/make-distribution.sh
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2014-04-23 10:19:32 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-04-23 10:19:43 -0700
commitd36d75cbd9020fefee885a940f4deccef2c75a9d (patch)
treec0a0fef97cb46ceec761e8941d1a3fb44724cde7 /make-distribution.sh
parentb0d8793205f3e9dace8be83cc57444b101395254 (diff)
downloadspark-d36d75cbd9020fefee885a940f4deccef2c75a9d.tar.gz
spark-d36d75cbd9020fefee885a940f4deccef2c75a9d.tar.bz2
spark-d36d75cbd9020fefee885a940f4deccef2c75a9d.zip
SPARK-1119 and other build improvements
1. Makes assembly and examples jar naming consistent in maven/sbt. 2. Updates make-distribution.sh to use Maven and fixes some bugs. 3. Updates the create-release script to call make-distribution script. Author: Patrick Wendell <pwendell@gmail.com> Closes #502 from pwendell/make-distribution and squashes the following commits: 1a97f0d [Patrick Wendell] SPARK-1119 and other build improvements (cherry picked from commit cd4ed293262e2349794c13467d1737974385c019) Signed-off-by: Patrick Wendell <pwendell@gmail.com>
Diffstat (limited to 'make-distribution.sh')
-rwxr-xr-xmake-distribution.sh70
1 files changed, 47 insertions, 23 deletions
diff --git a/make-distribution.sh b/make-distribution.sh
index 5c780fcbda..83dfc74585 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -28,6 +28,8 @@
# --tgz: Additionally creates spark-$VERSION-bin.tar.gz
# --hadoop VERSION: Builds against specified version of Hadoop.
# --with-yarn: Enables support for Hadoop YARN.
+# --with-hive: Enable support for reading Hive tables.
+# --name: A moniker for the release target. Defaults to the Hadoop verison.
#
# Recommended deploy/testing procedure (standalone mode):
# 1) Rsync / deploy the dist/ dir to one host
@@ -41,25 +43,20 @@
FWDIR="$(cd `dirname $0`; pwd)"
DISTDIR="$FWDIR/dist"
-# Get version from SBT
-export TERM=dumb # Prevents color codes in SBT output
-
-VERSIONSTRING=$($FWDIR/sbt/sbt "show version")
-
+VERSION=$(mvn help:evaluate -Dexpression=project.version |grep -v "INFO")
if [ $? == -1 ] ;then
- echo -e "You need sbt installed and available on your path."
- echo -e "Download sbt from http://www.scala-sbt.org/"
+ echo -e "You need Maven installed to build Spark."
+ echo -e "Download Maven from https://maven.apache.org."
exit -1;
fi
-VERSION=$(echo "${VERSIONSTRING}" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
-echo "Version is ${VERSION}"
-
# Initialize defaults
SPARK_HADOOP_VERSION=1.0.4
SPARK_YARN=false
+SPARK_HIVE=false
SPARK_TACHYON=false
MAKE_TGZ=false
+NAME=none
# Parse arguments
while (( "$#" )); do
@@ -71,23 +68,37 @@ while (( "$#" )); do
--with-yarn)
SPARK_YARN=true
;;
+ --with-hive)
+ SPARK_HIVE=true
+ ;;
--with-tachyon)
SPARK_TACHYON=true
;;
--tgz)
MAKE_TGZ=true
;;
+ --name)
+ NAME="$2"
+ shift
+ ;;
esac
shift
done
+if [ "$NAME" == "none" ]; then
+ NAME=$SPARK_HADOOP_VERSION
+fi
+
+echo "Spark version is $VERSION"
+
if [ "$MAKE_TGZ" == "true" ]; then
- echo "Making spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz"
+ echo "Making spark-$VERSION-bin-$NAME.tgz"
else
- echo "Making distribution for Spark $VERSION in $DISTDIR..."
+ echo "Making distribution for Spark $VERSION in $DISTDIR..."
fi
echo "Hadoop version set to $SPARK_HADOOP_VERSION"
+echo "Release name set to $NAME"
if [ "$SPARK_YARN" == "true" ]; then
echo "YARN enabled"
else
@@ -100,20 +111,32 @@ else
echo "Tachyon Disabled"
fi
-# Build fat JAR
-export SPARK_HADOOP_VERSION
-export SPARK_YARN
+# Build uber fat JAR
cd $FWDIR
-"sbt/sbt" "assembly/assembly"
+export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
+
+if [ "$SPARK_HIVE" == "true" ]; then
+ MAYBE_HIVE="-Phive"
+else
+ MAYBE_HIVE=""
+fi
+
+if [ "$SPARK_YARN" == "true" ]; then
+ mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
+ -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+else
+ mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+fi
# Make directories
rm -rf "$DISTDIR"
-mkdir -p "$DISTDIR/jars"
+mkdir -p "$DISTDIR/lib"
echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
# Copy jars
-cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/jars/"
+cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
+cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
# Copy other things
mkdir "$DISTDIR"/conf
@@ -135,16 +158,16 @@ if [ "$SPARK_TACHYON" == "true" ]; then
wget "$TACHYON_URL"
tar xf "tachyon-${TACHYON_VERSION}-bin.tar.gz"
- cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/jars"
+ cp "tachyon-${TACHYON_VERSION}/target/tachyon-${TACHYON_VERSION}-jar-with-dependencies.jar" "$DISTDIR/lib"
mkdir -p "$DISTDIR/tachyon/src/main/java/tachyon/web"
cp -r "tachyon-${TACHYON_VERSION}"/{bin,conf,libexec} "$DISTDIR/tachyon"
cp -r "tachyon-${TACHYON_VERSION}"/src/main/java/tachyon/web/resources "$DISTDIR/tachyon/src/main/java/tachyon/web"
if [[ `uname -a` == Darwin* ]]; then
# need to run sed differently on osx
- nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
+ nl=$'\n'; sed -i "" -e "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\\$nl export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
else
- sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../jars/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
+ sed -i "s|export TACHYON_JAR=\$TACHYON_HOME/target/\(.*\)|# This is set for spark's make-distribution\n export TACHYON_JAR=\$TACHYON_HOME/../lib/\1|" "$DISTDIR/tachyon/libexec/tachyon-config.sh"
fi
popd > /dev/null
@@ -152,8 +175,9 @@ if [ "$SPARK_TACHYON" == "true" ]; then
fi
if [ "$MAKE_TGZ" == "true" ]; then
- TARDIR="$FWDIR/spark-$VERSION"
+ TARDIR_NAME=spark-$VERSION-bin-$NAME
+ TARDIR="$FWDIR/$TARDIR_NAME"
cp -r "$DISTDIR" "$TARDIR"
- tar -zcf "spark-$VERSION-hadoop_$SPARK_HADOOP_VERSION-bin.tar.gz" -C "$FWDIR" "spark-$VERSION"
+ tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$FWDIR" "$TARDIR_NAME"
rm -rf "$TARDIR"
fi