#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # function exit_with_usage { cat << EOF usage: release-build.sh Creates build deliverables from a Spark commit. Top level targets are package: Create binary packages and copy them to home.apache docs: Build docs and copy them to home.apache publish-snapshot: Publish snapshot release to Apache snapshots publish-release: Publish a release to Apache release repo All other inputs are environment variables GIT_REF - Release tag or commit to build from SPARK_VERSION - Release identifier used when publishing SPARK_PACKAGE_VERSION - Release identifier in top level package directory REMOTE_PARENT_DIR - Parent in which to create doc or release builds. REMOTE_PARENT_MAX_LENGTH - If set, parent directory will be cleaned to only have this number of subdirectories (by deleting old ones). WARNING: This deletes data. ASF_USERNAME - Username of ASF committer account ASF_PASSWORD - Password of ASF committer account ASF_RSA_KEY - RSA private key file for ASF committer account GPG_KEY - GPG key used to sign release artifacts GPG_PASSPHRASE - Passphrase for GPG key EOF exit 1 } set -e if [ $# -eq 0 ]; then exit_with_usage fi if [[ $@ == *"help"* ]]; then exit_with_usage fi for env in ASF_USERNAME ASF_RSA_KEY GPG_PASSPHRASE GPG_KEY; do if [ -z "${!env}" ]; then echo "ERROR: $env must be set to run this script" exit_with_usage fi done # Explicitly set locale in order to make `sort` output consistent across machines. # See https://stackoverflow.com/questions/28881 for more details. export LC_ALL=C # Commit ref to checkout when building GIT_REF=${GIT_REF:-master} # Destination directory parent on remote server REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html} GPG="gpg --no-tty --batch" NEXUS_ROOT=https://repository.apache.org/service/local/staging NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads BASE_DIR=$(pwd) MVN="build/mvn --force" PUBLISH_PROFILES="-Pmesos -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2" PUBLISH_PROFILES="$PUBLISH_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl" rm -rf spark git clone https://git-wip-us.apache.org/repos/asf/spark.git cd spark git checkout $GIT_REF git_hash=`git rev-parse --short HEAD` echo "Checked out Spark git hash $git_hash" if [ -z "$SPARK_VERSION" ]; then SPARK_VERSION=$($MVN help:evaluate -Dexpression=project.version \ | grep -v INFO | grep -v WARNING | grep -v Download) fi if [ -z "$SPARK_PACKAGE_VERSION" ]; then SPARK_PACKAGE_VERSION="${SPARK_VERSION}-$(date +%Y_%m_%d_%H_%M)-${git_hash}" fi DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION" function LFTP { SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY" COMMANDS=$(cat < \ spark-$SPARK_VERSION.tgz.md5 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha rm -rf spark-$SPARK_VERSION # Updated for each binary build make_binary_release() { NAME=$1 FLAGS=$2 ZINC_PORT=$3 cp -r spark spark-$SPARK_VERSION-bin-$NAME cd spark-$SPARK_VERSION-bin-$NAME # TODO There should probably be a flag to make-distribution to allow 2.10 support if [[ $FLAGS == *scala-2.10* ]]; then ./dev/change-scala-version.sh 2.10 fi export ZINC_PORT=$ZINC_PORT echo "Creating distribution: $NAME ($FLAGS)" # Get maven home set by MVN MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'` ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \ -DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log cd .. cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz . echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \ --output spark-$SPARK_VERSION-bin-$NAME.tgz.asc \ --detach-sig spark-$SPARK_VERSION-bin-$NAME.tgz echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ MD5 spark-$SPARK_VERSION-bin-$NAME.tgz > \ spark-$SPARK_VERSION-bin-$NAME.tgz.md5 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ SHA512 spark-$SPARK_VERSION-bin-$NAME.tgz > \ spark-$SPARK_VERSION-bin-$NAME.tgz.sha } # TODO: Check exit codes of children here: # http://stackoverflow.com/questions/1570262/shell-get-exit-code-of-background-process # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos" make_binary_release "hadoop2.3" "-Phadoop2.3 $FLAGS" "3033" & make_binary_release "hadoop2.4" "-Phadoop2.4 $FLAGS" "3034" & make_binary_release "hadoop2.6" "-Phadoop2.6 $FLAGS" "3035" & make_binary_release "hadoop2.7" "-Phadoop2.7 $FLAGS" "3036" & make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" & make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" & wait rm -rf spark-$SPARK_VERSION-bin-*/ # Copy data dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-bin" echo "Copying release tarballs to $dest_dir" # Put to new directory: LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' # Delete /latest directory and rename new upload to /latest LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0" LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest" # Re-upload a second time and leave the files in the timestamped upload directory: LFTP mkdir -p $dest_dir LFTP mput -O $dest_dir 'spark-*' exit 0 fi if [[ "$1" == "docs" ]]; then # Documentation cd spark echo "Building Spark docs" dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-docs" cd docs # Compile docs with Java 7 to use nicer format # TODO: Make configurable to add this: PRODUCTION=1 PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build echo "Copying release documentation to $dest_dir" # Put to new directory: LFTP mkdir -p $dest_dir LFTP mirror -R _site $dest_dir # Delete /latest directory and rename new upload to /latest LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0" LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest" # Re-upload a second time and leave the files in the timestamped upload directory: LFTP mkdir -p $dest_dir LFTP mirror -R _site $dest_dir cd .. exit 0 fi if [[ "$1" == "publish-snapshot" ]]; then cd spark # Publish Spark to Maven release repo echo "Deploying Spark SNAPSHOT at '$GIT_REF' ($git_hash)" echo "Publish version is $SPARK_VERSION" if [[ ! $SPARK_VERSION == *"SNAPSHOT"* ]]; then echo "ERROR: Snapshots must have a version containing SNAPSHOT" echo "ERROR: You gave version '$SPARK_VERSION'" exit 1 fi # Coerce the requested version $MVN versions:set -DnewVersion=$SPARK_VERSION tmp_settings="tmp-settings.xml" echo "" > $tmp_settings echo "apache.snapshots.https$ASF_USERNAME" >> $tmp_settings echo "$ASF_PASSWORD" >> $tmp_settings echo "" >> $tmp_settings # Generate random point for Zinc export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)") $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $PUBLISH_PROFILES deploy ./dev/change-scala-version.sh 2.10 $MVN -DzincPort=$ZINC_PORT -Dscala-2.10 --settings $tmp_settings \ -DskipTests $PUBLISH_PROFILES clean deploy # Clean-up Zinc nailgun process /usr/sbin/lsof -P |grep $ZINC_PORT | grep LISTEN | awk '{ print $2; }' | xargs kill rm $tmp_settings cd .. exit 0 fi if [[ "$1" == "publish-release" ]]; then cd spark # Publish Spark to Maven release repo echo "Publishing Spark checkout at '$GIT_REF' ($git_hash)" echo "Publish version is $SPARK_VERSION" # Coerce the requested version $MVN versions:set -DnewVersion=$SPARK_VERSION # Using Nexus API documented here: # https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API echo "Creating Nexus staging repository" repo_request="Apache Spark $SPARK_VERSION (commit $git_hash)" out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \ -H "Content-Type:application/xml" -v \ $NEXUS_ROOT/profiles/$NEXUS_PROFILE/start) staged_repo_id=$(echo $out | sed -e "s/.*\(orgapachespark-[0-9]\{4\}\).*/\1/") echo "Created Nexus staging repository: $staged_repo_id" tmp_repo=$(mktemp -d spark-repo-XXXXX) # Generate random point for Zinc export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)") $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests $PUBLISH_PROFILES clean install ./dev/change-scala-version.sh 2.10 $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -Dscala-2.10 \ -DskipTests $PUBLISH_PROFILES clean install # Clean-up Zinc nailgun process /usr/sbin/lsof -P |grep $ZINC_PORT | grep LISTEN | awk '{ print $2; }' | xargs kill ./dev/change-version-to-2.10.sh pushd $tmp_repo/org/apache/spark # Remove any extra files generated during install find . -type f |grep -v \.jar |grep -v \.pom | xargs rm echo "Creating hash and signature files" for file in $(find . -type f) do echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --output $file.asc \ --detach-sig --armour $file; if [ $(command -v md5) ]; then # Available on OS X; -q to keep only hash md5 -q $file > $file.md5 else # Available on Linux; cut to keep only hash md5sum $file | cut -f1 -d' ' > $file.md5 fi sha1sum $file | cut -f1 -d' ' > $file.sha1 done nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id echo "Uplading files to $nexus_upload" for file in $(find . -type f) do # strip leading ./ file_short=$(echo $file | sed -e "s/\.\///") dest_url="$nexus_upload/org/apache/spark/$file_short" echo " Uploading $file_short" curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url done echo "Closing nexus staging repository" repo_request="$staged_repo_idApache Spark $SPARK_VERSION (commit $git_hash)" out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \ -H "Content-Type:application/xml" -v \ $NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish) echo "Closed Nexus staging repository: $staged_repo_id" popd rm -rf $tmp_repo cd .. exit 0 fi cd .. rm -rf spark echo "ERROR: expects to be called with 'package', 'docs', 'publish-release' or 'publish-snapshot'"