aboutsummaryrefslogtreecommitdiff
path: root/build
diff options
context:
space:
mode:
authorBrennon York <brennon.york@capitalone.com>2014-12-27 13:25:18 -0800
committerPatrick Wendell <pwendell@gmail.com>2014-12-27 13:26:38 -0800
commita3e51cc990812c8099dcaf1f3bd6d5bae45cf8e6 (patch)
treeb44d82a54f89d9a976a0ebfd3f59657538ddadb8 /build
parent080ceb771a1e6b9f844cfd4f1baa01133c106888 (diff)
downloadspark-a3e51cc990812c8099dcaf1f3bd6d5bae45cf8e6.tar.gz
spark-a3e51cc990812c8099dcaf1f3bd6d5bae45cf8e6.tar.bz2
spark-a3e51cc990812c8099dcaf1f3bd6d5bae45cf8e6.zip
[SPARK-4501][Core] - Create build/mvn to automatically download maven/zinc/scalac
Creates a top level directory script (as `build/mvn`) to automatically download zinc and the specific version of scala used to easily build spark. This will also download and install maven if the user doesn't already have it and all packages are hosted under the `build/` directory. Tested on both Linux and OSX OS's and both work. All commands pass through to the maven binary so it acts exactly as a traditional maven call would. Author: Brennon York <brennon.york@capitalone.com> Closes #3707 from brennonyork/SPARK-4501 and squashes the following commits: 0e5a0e4 [Brennon York] minor incorrect doc verbage (with -> this) 9b79e38 [Brennon York] fixed merge conflicts with dev/run-tests, properly quoted args in sbt/sbt, fixed bug where relative paths would fail if passed in from build/mvn d2d41b6 [Brennon York] added blurb about leverging zinc with build/mvn b979c58 [Brennon York] updated the merge conflict c5634de [Brennon York] updated documentation to overview build/mvn, updated all points where sbt/sbt was referenced with build/sbt b8437ba [Brennon York] set progress bars for curl and wget when not run on jenkins, no progress bar when run on jenkins, moved sbt script to build/sbt, wrote stub and warning under sbt/sbt which calls build/sbt, modified build/sbt to use the correct directory, fixed bug in build/sbt-launch-lib.bash to correctly pull the sbt version be11317 [Brennon York] added switch to silence download progress only if AMPLAB_JENKINS is set 28d0a99 [Brennon York] updated to remove the python dependency, uses grep instead 7e785a6 [Brennon York] added silent and quiet flags to curl and wget respectively, added single echo output to denote start of a download if download is needed 14a5da0 [Brennon York] removed unnecessary zinc output on startup 1af4a94 [Brennon York] fixed bug with uppercase vs lowercase variable 3e8b9b3 [Brennon York] updated to properly only restart zinc if it was freshly installed a680d12 [Brennon York] Added comments to functions and tested various mvn calls bb8cc9d [Brennon York] removed package files ef017e6 [Brennon York] removed OS complexities, setup generic install_app call, removed extra file complexities, removed help, removed forced install (defaults now), removed double-dash from cli 07bf018 [Brennon York] Updated to specifically handle pulling down the correct scala version f914dea [Brennon York] Beginning final portions of localized scala home 69c4e44 [Brennon York] working linux and osx installers for purely local mvn build 4a1609c [Brennon York] finalizing working linux install for maven to local ./build/apache-maven folder cbfcc68 [Brennon York] Changed the default sbt/sbt to build/sbt and added a build/mvn which will automatically download, install, and execute maven with zinc for easier build capability
Diffstat (limited to 'build')
-rwxr-xr-xbuild/mvn132
-rwxr-xr-xbuild/sbt111
-rwxr-xr-xbuild/sbt-launch-lib.bash188
3 files changed, 431 insertions, 0 deletions
diff --git a/build/mvn b/build/mvn
new file mode 100755
index 0000000000..dde486a8ac
--- /dev/null
+++ b/build/mvn
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+
+# Determine the current working directory
+_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# Preserve the calling directory
+_CALLING_DIR="$(pwd)"
+
+# Installs any application tarball given a URL, the expected tarball name,
+# and, optionally, a checkable binary path to determine if the binary has
+# already been installed
+## Arg1 - URL
+## Arg2 - Tarball Name
+## Arg3 - Checkable Binary
+install_app() {
+ local remote_tarball="$1/$2"
+ local local_tarball="${_DIR}/$2"
+ local binary="${_DIR}/$3"
+
+ # setup `curl` and `wget` silent options if we're running on Jenkins
+ local curl_opts=""
+ local wget_opts=""
+ if [ -n "$AMPLAB_JENKINS" ]; then
+ curl_opts="-s"
+ wget_opts="--quiet"
+ else
+ curl_opts="--progress-bar"
+ wget_opts="--progress=bar:force"
+ fi
+
+ if [ -z "$3" -o ! -f "$binary" ]; then
+ # check if we already have the tarball
+ # check if we have curl installed
+ # download application
+ [ ! -f "${local_tarball}" ] && [ -n "`which curl 2>/dev/null`" ] && \
+ echo "exec: curl ${curl_opts} ${remote_tarball}" && \
+ curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
+ # if the file still doesn't exist, lets try `wget` and cross our fingers
+ [ ! -f "${local_tarball}" ] && [ -n "`which wget 2>/dev/null`" ] && \
+ echo "exec: wget ${wget_opts} ${remote_tarball}" && \
+ wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
+ # if both were unsuccessful, exit
+ [ ! -f "${local_tarball}" ] && \
+ echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
+ echo "please install manually and try again." && \
+ exit 2
+ cd "${_DIR}" && tar -xzf "$2"
+ rm -rf "$local_tarball"
+ fi
+}
+
+# Install maven under the build/ folder
+install_mvn() {
+ install_app \
+ "http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
+ "apache-maven-3.2.3-bin.tar.gz" \
+ "apache-maven-3.2.3/bin/mvn"
+ MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
+}
+
+# Install zinc under the build/ folder
+install_zinc() {
+ local zinc_path="zinc-0.3.5.3/bin/zinc"
+ [ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
+ install_app \
+ "http://downloads.typesafe.com/zinc/0.3.5.3" \
+ "zinc-0.3.5.3.tgz" \
+ "${zinc_path}"
+ ZINC_BIN="${_DIR}/${zinc_path}"
+}
+
+# Determine the Scala version from the root pom.xml file, set the Scala URL,
+# and, with that, download the specific version of Scala necessary under
+# the build/ folder
+install_scala() {
+ # determine the Scala version used in Spark
+ local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
+ head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
+ local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
+
+ install_app \
+ "http://downloads.typesafe.com/scala/${scala_version}" \
+ "scala-${scala_version}.tgz" \
+ "scala-${scala_version}/bin/scala"
+
+ SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
+ SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
+}
+
+# Determines if a given application is already installed. If not, will attempt
+# to install
+## Arg1 - application name
+## Arg2 - Alternate path to local install under build/ dir
+check_and_install_app() {
+ # create the local environment variable in uppercase
+ local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
+ # some black magic to set the generated app variable (i.e. MVN_BIN) into the
+ # environment
+ eval "${app_bin}=`which $1 2>/dev/null`"
+
+ if [ -z "`which $1 2>/dev/null`" ]; then
+ install_$1
+ fi
+}
+
+# Setup healthy defaults for the Zinc port if none were provided from
+# the environment
+ZINC_PORT=${ZINC_PORT:-"3030"}
+
+# Check and install all applications necessary to build Spark
+check_and_install_app "mvn"
+
+# Install the proper version of Scala and Zinc for the build
+install_zinc
+install_scala
+
+# Reset the current working directory
+cd "${_CALLING_DIR}"
+
+# Now that zinc is ensured to be installed, check its status and, if its
+# not running or just installed, start it
+if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
+ ${ZINC_BIN} -shutdown
+ ${ZINC_BIN} -start -port ${ZINC_PORT} \
+ -scala-compiler "${SCALA_COMPILER}" \
+ -scala-library "${SCALA_LIBRARY}" &>/dev/null
+fi
+
+# Set any `mvn` options if not already present
+export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
+
+# Last, call the `mvn` command as usual
+${MVN_BIN} "$@"
diff --git a/build/sbt b/build/sbt
new file mode 100755
index 0000000000..0a251d97db
--- /dev/null
+++ b/build/sbt
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+
+# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
+# that we can run Hive to generate the golden answer. This is not required for normal development
+# or testing.
+for i in "$HIVE_HOME"/lib/*
+do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
+done
+export HADOOP_CLASSPATH
+
+realpath () {
+(
+ TARGET_FILE="$1"
+
+ cd "$(dirname "$TARGET_FILE")"
+ TARGET_FILE="$(basename "$TARGET_FILE")"
+
+ COUNT=0
+ while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
+ do
+ TARGET_FILE="$(readlink "$TARGET_FILE")"
+ cd $(dirname "$TARGET_FILE")
+ TARGET_FILE="$(basename $TARGET_FILE)"
+ COUNT=$(($COUNT + 1))
+ done
+
+ echo "$(pwd -P)/"$TARGET_FILE""
+)
+}
+
+. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
+
+
+declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
+declare -r sbt_opts_file=".sbtopts"
+declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
+
+usage() {
+ cat <<EOM
+Usage: $script_name [options]
+
+ -h | -help print this message
+ -v | -verbose this runner is chattier
+ -d | -debug set sbt log level to debug
+ -no-colors disable ANSI color codes
+ -sbt-create start sbt even if current directory contains no sbt project
+ -sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
+ -sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
+ -ivy <path> path to local Ivy repository (default: ~/.ivy2)
+ -mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
+ -no-share use all local caches; no sharing
+ -no-global uses global caches, but does not use global ~/.sbt directory.
+ -jvm-debug <port> Turn on JVM debugging, open at the given port.
+ -batch Disable interactive mode
+
+ # sbt version (default: from project/build.properties if present, else latest release)
+ -sbt-version <version> use the specified version of sbt
+ -sbt-jar <path> use the specified jar as the sbt launcher
+ -sbt-rc use an RC version of sbt
+ -sbt-snapshot use a snapshot version of sbt
+
+ # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
+ -java-home <path> alternate JAVA_HOME
+
+ # jvm options and output control
+ JAVA_OPTS environment variable, if unset uses "$java_opts"
+ SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
+ .sbtopts if this file exists in the current directory, it is
+ prepended to the runner args
+ /etc/sbt/sbtopts if this file exists, it is prepended to the runner args
+ -Dkey=val pass -Dkey=val directly to the java runtime
+ -J-X pass option -X directly to the java runtime
+ (-J is stripped)
+ -S-X add -X to sbt's scalacOptions (-S is stripped)
+ -PmavenProfiles Enable a maven profile for the build.
+
+In the case of duplicated or conflicting options, the order above
+shows precedence: JAVA_OPTS lowest, command line options highest.
+EOM
+}
+
+process_my_args () {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
+ -no-share) addJava "$noshare_opts" && shift ;;
+ -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
+ -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
+ -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
+ -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
+ -batch) exec </dev/null && shift ;;
+
+ -sbt-create) sbt_create=true && shift ;;
+
+ *) addResidual "$1" && shift ;;
+ esac
+ done
+
+ # Now, ensure sbt version is used.
+ [[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
+}
+
+loadConfigFile() {
+ cat "$1" | sed '/^\#/d'
+}
+
+# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
+[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
+[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
+
+run "$@"
diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
new file mode 100755
index 0000000000..f5df439eff
--- /dev/null
+++ b/build/sbt-launch-lib.bash
@@ -0,0 +1,188 @@
+#!/usr/bin/env bash
+#
+
+# A library to simplify using the SBT launcher from other packages.
+# Note: This should be used by tools like giter8/conscript etc.
+
+# TODO - Should we merge the main SBT script with this library?
+
+if test -z "$HOME"; then
+ declare -r script_dir="$(dirname "$script_path")"
+else
+ declare -r script_dir="$HOME/.sbt"
+fi
+
+declare -a residual_args
+declare -a java_args
+declare -a scalac_args
+declare -a sbt_commands
+declare -a maven_profiles
+
+if test -x "$JAVA_HOME/bin/java"; then
+ echo -e "Using $JAVA_HOME as default JAVA_HOME."
+ echo "Note, this will be overridden by -java-home if it is set."
+ declare java_cmd="$JAVA_HOME/bin/java"
+else
+ declare java_cmd=java
+fi
+
+echoerr () {
+ echo 1>&2 "$@"
+}
+vlog () {
+ [[ $verbose || $debug ]] && echoerr "$@"
+}
+dlog () {
+ [[ $debug ]] && echoerr "$@"
+}
+
+acquire_sbt_jar () {
+ SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
+ URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+ URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+ JAR=build/sbt-launch-${SBT_VERSION}.jar
+
+ sbt_jar=$JAR
+
+ if [[ ! -f "$sbt_jar" ]]; then
+ # Download sbt launch jar if it hasn't been downloaded yet
+ if [ ! -f "${JAR}" ]; then
+ # Download
+ printf "Attempting to fetch sbt\n"
+ JAR_DL="${JAR}.part"
+ if hash curl 2>/dev/null; then
+ (curl --silent ${URL1} > "${JAR_DL}" || curl --silent ${URL2} > "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
+ elif hash wget 2>/dev/null; then
+ (wget --quiet ${URL1} -O "${JAR_DL}" || wget --quiet ${URL2} -O "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
+ else
+ printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
+ exit -1
+ fi
+ fi
+ if [ ! -f "${JAR}" ]; then
+ # We failed to download
+ printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
+ exit -1
+ fi
+ printf "Launching sbt from ${JAR}\n"
+ fi
+}
+
+execRunner () {
+ # print the arguments one to a line, quoting any containing spaces
+ [[ $verbose || $debug ]] && echo "# Executing command line:" && {
+ for arg; do
+ if printf "%s\n" "$arg" | grep -q ' '; then
+ printf "\"%s\"\n" "$arg"
+ else
+ printf "%s\n" "$arg"
+ fi
+ done
+ echo ""
+ }
+
+ exec "$@"
+}
+
+addJava () {
+ dlog "[addJava] arg = '$1'"
+ java_args=( "${java_args[@]}" "$1" )
+}
+
+enableProfile () {
+ dlog "[enableProfile] arg = '$1'"
+ maven_profiles=( "${maven_profiles[@]}" "$1" )
+ export SBT_MAVEN_PROFILES="${maven_profiles[@]}"
+}
+
+addSbt () {
+ dlog "[addSbt] arg = '$1'"
+ sbt_commands=( "${sbt_commands[@]}" "$1" )
+}
+addResidual () {
+ dlog "[residual] arg = '$1'"
+ residual_args=( "${residual_args[@]}" "$1" )
+}
+addDebugger () {
+ addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1"
+}
+
+# a ham-fisted attempt to move some memory settings in concert
+# so they need not be dicked around with individually.
+get_mem_opts () {
+ local mem=${1:-2048}
+ local perm=$(( $mem / 4 ))
+ (( $perm > 256 )) || perm=256
+ (( $perm < 4096 )) || perm=4096
+ local codecache=$(( $perm / 2 ))
+
+ echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m"
+}
+
+require_arg () {
+ local type="$1"
+ local opt="$2"
+ local arg="$3"
+ if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
+ echo "$opt requires <$type> argument" 1>&2
+ exit 1
+ fi
+}
+
+is_function_defined() {
+ declare -f "$1" > /dev/null
+}
+
+process_args () {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -h|-help) usage; exit 1 ;;
+ -v|-verbose) verbose=1 && shift ;;
+ -d|-debug) debug=1 && shift ;;
+
+ -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
+ -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
+ -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
+ -batch) exec </dev/null && shift ;;
+
+ -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
+ -sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;;
+ -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;
+
+ -D*) addJava "$1" && shift ;;
+ -J*) addJava "${1:2}" && shift ;;
+ -P*) enableProfile "$1" && shift ;;
+ *) addResidual "$1" && shift ;;
+ esac
+ done
+
+ is_function_defined process_my_args && {
+ myargs=("${residual_args[@]}")
+ residual_args=()
+ process_my_args "${myargs[@]}"
+ }
+}
+
+run() {
+ # no jar? download it.
+ [[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
+ # still no jar? uh-oh.
+ echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
+ exit 1
+ }
+
+ # process the combined args, then reset "$@" to the residuals
+ process_args "$@"
+ set -- "${residual_args[@]}"
+ argumentCount=$#
+
+ # run sbt
+ execRunner "$java_cmd" \
+ ${SBT_OPTS:-$default_sbt_opts} \
+ $(get_mem_opts $sbt_mem) \
+ ${java_opts} \
+ ${java_args[@]} \
+ -jar "$sbt_jar" \
+ "${sbt_commands[@]}" \
+ "${residual_args[@]}"
+}