aboutsummaryrefslogtreecommitdiff
path: root/dev/create-release/release-build.sh
diff options
context:
space:
mode:
authorPatrick Wendell <patrick@databricks.com>2015-08-11 21:16:48 -0700
committerPatrick Wendell <patrick@databricks.com>2015-08-11 21:16:48 -0700
commit3ef0f32928fc383ad3edd5ad167212aeb9eba6e1 (patch)
tree2390156671462ccecca82adab9e93625ea9ea584 /dev/create-release/release-build.sh
parentca8f70e9d473d2c81866f3c330cc6545c33bdac7 (diff)
downloadspark-3ef0f32928fc383ad3edd5ad167212aeb9eba6e1.tar.gz
spark-3ef0f32928fc383ad3edd5ad167212aeb9eba6e1.tar.bz2
spark-3ef0f32928fc383ad3edd5ad167212aeb9eba6e1.zip
[SPARK-1517] Refactor release scripts to facilitate nightly publishing
This update contains some code changes to the release scripts that allow easier nightly publishing. I've been using these new scripts on Jenkins for cutting and publishing nightly snapshots for the last month or so, and it has been going well. I'd like to get them merged back upstream so this can be maintained by the community. The main changes are: 1. Separates the release tagging from various build possibilities for an already tagged release (`release-tag.sh` and `release-build.sh`). 2. Allow for injecting credentials through the environment, including GPG keys. This is then paired with secure key injection in Jenkins. 3. Support for copying build results to a remote directory, and also "rotating" results, e.g. the ability to keep the last N copies of binary or doc builds. I'm happy if anyone wants to take a look at this - it's not user facing but an internal utility used for generating releases. Author: Patrick Wendell <patrick@databricks.com> Closes #7411 from pwendell/release-script-updates and squashes the following commits: 74f9beb [Patrick Wendell] Moving maven build command to a variable 233ce85 [Patrick Wendell] [SPARK-1517] Refactor release scripts to facilitate nightly publishing
Diffstat (limited to 'dev/create-release/release-build.sh')
-rwxr-xr-xdev/create-release/release-build.sh321
1 files changed, 321 insertions, 0 deletions
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
new file mode 100755
index 0000000000..399c73e7bf
--- /dev/null
+++ b/dev/create-release/release-build.sh
@@ -0,0 +1,321 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function exit_with_usage {
+ cat << EOF
+usage: release-build.sh <package|docs|publish-snapshot|publish-release>
+Creates build deliverables from a Spark commit.
+
+Top level targets are
+ package: Create binary packages and copy them to people.apache
+ docs: Build docs and copy them to people.apache
+ publish-snapshot: Publish snapshot release to Apache snapshots
+ publish-release: Publish a release to Apache release repo
+
+All other inputs are environment variables
+
+GIT_REF - Release tag or commit to build from
+SPARK_VERSION - Release identifier used when publishing
+SPARK_PACKAGE_VERSION - Release identifier in top level package directory
+REMOTE_PARENT_DIR - Parent in which to create doc or release builds.
+REMOTE_PARENT_MAX_LENGTH - If set, parent directory will be cleaned to only
+ have this number of subdirectories (by deleting old ones). WARNING: This deletes data.
+
+ASF_USERNAME - Username of ASF committer account
+ASF_PASSWORD - Password of ASF committer account
+ASF_RSA_KEY - RSA private key file for ASF committer account
+
+GPG_KEY - GPG key used to sign release artifacts
+GPG_PASSPHRASE - Passphrase for GPG key
+EOF
+ exit 1
+}
+
+set -e
+
+if [ $# -eq 0 ]; then
+ exit_with_usage
+fi
+
+if [[ $@ == *"help"* ]]; then
+ exit_with_usage
+fi
+
+for env in ASF_USERNAME ASF_RSA_KEY GPG_PASSPHRASE GPG_KEY; do
+ if [ -z "${!env}" ]; then
+ echo "ERROR: $env must be set to run this script"
+ exit_with_usage
+ fi
+done
+
+# Commit ref to checkout when building
+GIT_REF=${GIT_REF:-master}
+
+# Destination directory parent on remote server
+REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html}
+
+SSH="ssh -o StrictHostKeyChecking=no -i $ASF_RSA_KEY"
+GPG="gpg --no-tty --batch"
+NEXUS_ROOT=https://repository.apache.org/service/local/staging
+NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
+BASE_DIR=$(pwd)
+
+MVN="build/mvn --force"
+PUBLISH_PROFILES="-Pyarn -Phive -Phadoop-2.2"
+PUBLISH_PROFILES="$PUBLISH_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
+
+rm -rf spark
+git clone https://git-wip-us.apache.org/repos/asf/spark.git
+cd spark
+git checkout $GIT_REF
+git_hash=`git rev-parse --short HEAD`
+echo "Checked out Spark git hash $git_hash"
+
+if [ -z "$SPARK_VERSION" ]; then
+ SPARK_VERSION=$($MVN help:evaluate -Dexpression=project.version \
+ | grep -v INFO | grep -v WARNING | grep -v Download)
+fi
+
+if [ -z "$SPARK_PACKAGE_VERSION" ]; then
+ SPARK_PACKAGE_VERSION="${SPARK_VERSION}-$(date +%Y_%m_%d_%H_%M)-${git_hash}"
+fi
+
+DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION"
+USER_HOST="$ASF_USERNAME@people.apache.org"
+
+rm .gitignore
+rm -rf .git
+cd ..
+
+if [ -n "$REMOTE_PARENT_MAX_LENGTH" ]; then
+ old_dirs=$($SSH $USER_HOST ls -t $REMOTE_PARENT_DIR | tail -n +$REMOTE_PARENT_MAX_LENGTH)
+ for old_dir in $old_dirs; do
+ echo "Removing directory: $old_dir"
+ $SSH $USER_HOST rm -r $REMOTE_PARENT_DIR/$old_dir
+ done
+fi
+
+if [[ "$1" == "package" ]]; then
+ # Source and binary tarballs
+ echo "Packaging release tarballs"
+ cp -r spark spark-$SPARK_VERSION
+ tar cvzf spark-$SPARK_VERSION.tgz spark-$SPARK_VERSION
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
+ --detach-sig spark-$SPARK_VERSION.tgz
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md MD5 spark-$SPARK_VERSION.tgz > \
+ spark-$SPARK_VERSION.tgz.md5
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+ SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha
+ rm -rf spark-$SPARK_VERSION
+
+ # Updated for each binary build
+ make_binary_release() {
+ NAME=$1
+ FLAGS=$2
+ ZINC_PORT=$3
+ cp -r spark spark-$SPARK_VERSION-bin-$NAME
+
+ cd spark-$SPARK_VERSION-bin-$NAME
+
+ # TODO There should probably be a flag to make-distribution to allow 2.11 support
+ if [[ $FLAGS == *scala-2.11* ]]; then
+ ./dev/change-scala-version.sh 2.11
+ fi
+
+ export ZINC_PORT=$ZINC_PORT
+ echo "Creating distribution: $NAME ($FLAGS)"
+ ./make-distribution.sh --name $NAME --tgz $FLAGS -DzincPort=$ZINC_PORT 2>&1 > \
+ ../binary-release-$NAME.log
+ cd ..
+ cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
+
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
+ --output spark-$SPARK_VERSION-bin-$NAME.tgz.asc \
+ --detach-sig spark-$SPARK_VERSION-bin-$NAME.tgz
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+ MD5 spark-$SPARK_VERSION-bin-$NAME.tgz > \
+ spark-$SPARK_VERSION-bin-$NAME.tgz.md5
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+ SHA512 spark-$SPARK_VERSION-bin-$NAME.tgz > \
+ spark-$SPARK_VERSION-bin-$NAME.tgz.sha
+ }
+
+ # TODO: Check exit codes of children here:
+ # http://stackoverflow.com/questions/1570262/shell-get-exit-code-of-background-process
+
+ # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
+ # share the same Zinc server.
+ make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver" "3030" &
+ make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive -Dscala-2.11" "3031" &
+ make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
+ make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
+ make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
+ make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn" "3034" &
+ make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn" "3037" &
+ make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn" "3038" &
+ wait
+ rm -rf spark-$SPARK_VERSION-bin-*/
+
+ # Copy data
+ dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-bin"
+ echo "Copying release tarballs to $dest_dir"
+ $SSH $USER_HOST mkdir $dest_dir
+ rsync -e "$SSH" spark-* $USER_HOST:$dest_dir
+ echo "Linking /latest to $dest_dir"
+ $SSH $USER_HOST rm -f "$REMOTE_PARENT_DIR/latest"
+ $SSH $USER_HOST ln -s $dest_dir "$REMOTE_PARENT_DIR/latest"
+ exit 0
+fi
+
+if [[ "$1" == "docs" ]]; then
+ # Documentation
+ cd spark
+ echo "Building Spark docs"
+ dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-docs"
+ cd docs
+ # Compile docs with Java 7 to use nicer format
+ # TODO: Make configurable to add this: PRODUCTION=1
+ PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build
+ echo "Copying release documentation to $dest_dir"
+ $SSH $USER_HOST mkdir $dest_dir
+ echo "Linking /latest to $dest_dir"
+ $SSH $USER_HOST rm -f "$REMOTE_PARENT_DIR/latest"
+ $SSH $USER_HOST ln -s $dest_dir "$REMOTE_PARENT_DIR/latest"
+ rsync -e "$SSH" -r _site/* $USER_HOST:$dest_dir
+ cd ..
+ exit 0
+fi
+
+if [[ "$1" == "publish-snapshot" ]]; then
+ cd spark
+ # Publish Spark to Maven release repo
+ echo "Deploying Spark SNAPSHOT at '$GIT_REF' ($git_hash)"
+ echo "Publish version is $SPARK_VERSION"
+ if [[ ! $SPARK_VERSION == *"SNAPSHOT"* ]]; then
+ echo "ERROR: Snapshots must have a version containing SNAPSHOT"
+ echo "ERROR: You gave version '$SPARK_VERSION'"
+ exit 1
+ fi
+ # Coerce the requested version
+ $MVN versions:set -DnewVersion=$SPARK_VERSION
+ tmp_settings="tmp-settings.xml"
+ echo "<settings><servers><server>" > $tmp_settings
+ echo "<id>apache.snapshots.https</id><username>$ASF_USERNAME</username>" >> $tmp_settings
+ echo "<password>$ASF_PASSWORD</password>" >> $tmp_settings
+ echo "</server></servers></settings>" >> $tmp_settings
+
+ # Generate random point for Zinc
+ export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)")
+
+ $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $PUBLISH_PROFILES \
+ -Phive-thriftserver deploy
+ ./dev/change-scala-version.sh 2.10
+ $MVN -DzincPort=$ZINC_PORT -Dscala-2.11 --settings $tmp_settings \
+ -DskipTests $PUBLISH_PROFILES deploy
+
+ # Clean-up Zinc nailgun process
+ /usr/sbin/lsof -P |grep $ZINC_PORT | grep LISTEN | awk '{ print $2; }' | xargs kill
+
+ rm $tmp_settings
+ cd ..
+ exit 0
+fi
+
+if [[ "$1" == "publish-release" ]]; then
+ cd spark
+ # Publish Spark to Maven release repo
+ echo "Publishing Spark checkout at '$GIT_REF' ($git_hash)"
+ echo "Publish version is $SPARK_VERSION"
+ # Coerce the requested version
+ $MVN versions:set -DnewVersion=$SPARK_VERSION
+
+ # Using Nexus API documented here:
+ # https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API
+ echo "Creating Nexus staging repository"
+ repo_request="<promoteRequest><data><description>Apache Spark $SPARK_VERSION (commit $git_hash)</description></data></promoteRequest>"
+ out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
+ -H "Content-Type:application/xml" -v \
+ $NEXUS_ROOT/profiles/$NEXUS_PROFILE/start)
+ staged_repo_id=$(echo $out | sed -e "s/.*\(orgapachespark-[0-9]\{4\}\).*/\1/")
+ echo "Created Nexus staging repository: $staged_repo_id"
+
+ tmp_repo=$(mktemp -d spark-repo-XXXXX)
+
+ # Generate random point for Zinc
+ export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)")
+
+ $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests $PUBLISH_PROFILES \
+ -Phive-thriftserver clean install
+
+ ./dev/change-scala-version.sh 2.11
+
+ $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -Dscala-2.11 \
+ -DskipTests $PUBLISH_PROFILES clean install
+
+ # Clean-up Zinc nailgun process
+ /usr/sbin/lsof -P |grep $ZINC_PORT | grep LISTEN | awk '{ print $2; }' | xargs kill
+
+ ./dev/change-version-to-2.10.sh
+
+ pushd $tmp_repo/org/apache/spark
+
+ # Remove any extra files generated during install
+ find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
+
+ echo "Creating hash and signature files"
+ for file in $(find . -type f)
+ do
+ echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --output $file.asc \
+ --detach-sig --armour $file;
+ if [ $(command -v md5) ]; then
+ # Available on OS X; -q to keep only hash
+ md5 -q $file > $file.md5
+ else
+ # Available on Linux; cut to keep only hash
+ md5sum $file | cut -f1 -d' ' > $file.md5
+ fi
+ sha1sum $file | cut -f1 -d' ' > $file.sha1
+ done
+
+ nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id
+ echo "Uplading files to $nexus_upload"
+ for file in $(find . -type f)
+ do
+ # strip leading ./
+ file_short=$(echo $file | sed -e "s/\.\///")
+ dest_url="$nexus_upload/org/apache/spark/$file_short"
+ echo " Uploading $file_short"
+ curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
+ done
+
+ echo "Closing nexus staging repository"
+ repo_request="<promoteRequest><data><stagedRepositoryId>$staged_repo_id</stagedRepositoryId><description>Apache Spark $SPARK_VERSION (commit $git_hash)</description></data></promoteRequest>"
+ out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
+ -H "Content-Type:application/xml" -v \
+ $NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish)
+ echo "Closed Nexus staging repository: $staged_repo_id"
+ popd
+ rm -rf $tmp_repo
+ cd ..
+ exit 0
+fi
+
+cd ..
+rm -rf spark
+echo "ERROR: expects to be called with 'package', 'docs', 'publish-release' or 'publish-snapshot'"