+#!/usr/bin/env bash
+# Determine the current working directory
+_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# Preserve the calling directory
+# Installs any application tarball given a URL, the expected tarball name,
+# and, optionally, a checkable binary path to determine if the binary has
+# already been installed
+## Arg1 - URL
+## Arg2 - Tarball Name
+## Arg3 - Checkable Binary
+install_app() {
+ local remote_tarball="$1/$2"
+ local local_tarball="${_DIR}/$2"
+ local binary="${_DIR}/$3"
+ # setup `curl` and `wget` silent options if we're running on Jenkins
+ local curl_opts=""
+ local wget_opts=""
+ if [ -n "$AMPLAB_JENKINS" ]; then
+ curl_opts="-s"
+ wget_opts="--quiet"
+ else
+ curl_opts="--progress-bar"
+ wget_opts="--progress=bar:force"
+ fi
+ if [ -z "$3" -o ! -f "$binary" ]; then
+ # check if we already have the tarball
+ # check if we have curl installed
+ # download application
+ [ ! -f "${local_tarball}" ] && [ -n "`which curl 2>/dev/null`" ] && \
+ echo "exec: curl ${curl_opts} ${remote_tarball}" && \
+ curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
+ # if the file still doesn't exist, lets try `wget` and cross our fingers
+ [ ! -f "${local_tarball}" ] && [ -n "`which wget 2>/dev/null`" ] && \
+ echo "exec: wget ${wget_opts} ${remote_tarball}" && \
+ wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
+ # if both were unsuccessful, exit
+ [ ! -f "${local_tarball}" ] && \
+ echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
+ echo "please install manually and try again." && \
+ exit 2
+ cd "${_DIR}" && tar -xzf "$2"
+ rm -rf "$local_tarball"
+ fi
+# Install maven under the build/ folder
+install_mvn() {
+ install_app \
+ "http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
+ "apache-maven-3.2.3-bin.tar.gz" \
+ "apache-maven-3.2.3/bin/mvn"
+ MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
+# Install zinc under the build/ folder
+install_zinc() {
+ local zinc_path="zinc-"
+ [ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
+ install_app \
+ "http://downloads.typesafe.com/zinc/" \
+ "zinc-" \
+ "${zinc_path}"
+ ZINC_BIN="${_DIR}/${zinc_path}"
+# Determine the Scala version from the root pom.xml file, set the Scala URL,
+# and, with that, download the specific version of Scala necessary under
+# the build/ folder
+install_scala() {
+ # determine the Scala version used in Spark
+ local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
+ head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
+ local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
+ install_app \
+ "http://downloads.typesafe.com/scala/${scala_version}" \
+ "scala-${scala_version}.tgz" \
+ "scala-${scala_version}/bin/scala"
+ SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
+ SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
+# Determines if a given application is already installed. If not, will attempt
+# to install
+## Arg1 - application name
+## Arg2 - Alternate path to local install under build/ dir
+check_and_install_app() {
+ # create the local environment variable in uppercase
+ local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
+ # some black magic to set the generated app variable (i.e. MVN_BIN) into the
+ # environment
+ eval "${app_bin}=`which $1 2>/dev/null`"
+ if [ -z "`which $1 2>/dev/null`" ]; then
+ install_$1
+ fi
+# Setup healthy defaults for the Zinc port if none were provided from
+# the environment
+# Check and install all applications necessary to build Spark
+check_and_install_app "mvn"
+# Install the proper version of Scala and Zinc for the build
+# Reset the current working directory
+cd "${_CALLING_DIR}"
+# Now that zinc is ensured to be installed, check its status and, if its
+# not running or just installed, start it
+if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
+ ${ZINC_BIN} -shutdown
+ ${ZINC_BIN} -start -port ${ZINC_PORT} \
+ -scala-compiler "${SCALA_COMPILER}" \
+ -scala-library "${SCALA_LIBRARY}" &>/dev/null
+# Set any `mvn` options if not already present
+export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
+# Last, call the `mvn` command as usual
+${MVN_BIN} "$@"
+#!/usr/bin/env bash
+# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
+# that we can run Hive to generate the golden answer. This is not required for normal development
+# or testing.
+for i in "$HIVE_HOME"/lib/*
+realpath () {
+ cd "$(dirname "$TARGET_FILE")"
+ TARGET_FILE="$(basename "$TARGET_FILE")"
+ while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
+ do
+ TARGET_FILE="$(readlink "$TARGET_FILE")"
+ cd $(dirname "$TARGET_FILE")
+ TARGET_FILE="$(basename $TARGET_FILE)"
+ COUNT=$(($COUNT + 1))
+ done
+ echo "$(pwd -P)/"$TARGET_FILE""
+. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
+declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
+declare -r sbt_opts_file=".sbtopts"
+declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
+usage() {
+ cat <<EOM
+Usage: $script_name [options]
+ -h | -help print this message
+ -v | -verbose this runner is chattier
+ -d | -debug set sbt log level to debug
+ -no-colors disable ANSI color codes
+ -sbt-create start sbt even if current directory contains no sbt project
+ -sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
+ -sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
+ -ivy <path> path to local Ivy repository (default: ~/.ivy2)
+ -mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
+ -no-share use all local caches; no sharing
+ -no-global uses global caches, but does not use global ~/.sbt directory.
+ -jvm-debug <port> Turn on JVM debugging, open at the given port.
+ -batch Disable interactive mode
+ # sbt version (default: from project/build.properties if present, else latest release)
+ -sbt-version <version> use the specified version of sbt
+ -sbt-jar <path> use the specified jar as the sbt launcher
+ -sbt-rc use an RC version of sbt
+ -sbt-snapshot use a snapshot version of sbt
+ # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
+ -java-home <path> alternate JAVA_HOME
+ # jvm options and output control
+ JAVA_OPTS environment variable, if unset uses "$java_opts"
+ SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
+ .sbtopts if this file exists in the current directory, it is
+ prepended to the runner args
+ /etc/sbt/sbtopts if this file exists, it is prepended to the runner args
+ -Dkey=val pass -Dkey=val directly to the java runtime
+ -J-X pass option -X directly to the java runtime
+ (-J is stripped)
+ -S-X add -X to sbt's scalacOptions (-S is stripped)
+ -PmavenProfiles Enable a maven profile for the build.
+In the case of duplicated or conflicting options, the order above
+shows precedence: JAVA_OPTS lowest, command line options highest.
+process_my_args () {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
+ -no-share) addJava "$noshare_opts" && shift ;;
+ -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
+ -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
+ -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
+ -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
+ -batch) exec </dev/null && shift ;;
+ -sbt-create) sbt_create=true && shift ;;
+ *) addResidual "$1" && shift ;;
+ esac
+ done
+ # Now, ensure sbt version is used.
+ [[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
+loadConfigFile() {
+ cat "$1" | sed '/^\#/d'
+# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
+[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
+[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
+run "$@"
+#!/usr/bin/env bash
+# A library to simplify using the SBT launcher from other packages.
+# Note: This should be used by tools like giter8/conscript etc.
+# TODO - Should we merge the main SBT script with this library?
+if test -z "$HOME"; then
+ declare -r script_dir="$(dirname "$script_path")"
+ declare -r script_dir="$HOME/.sbt"
+declare -a residual_args
+declare -a java_args
+declare -a scalac_args
+declare -a sbt_commands
+declare -a maven_profiles
+if test -x "$JAVA_HOME/bin/java"; then
+ echo -e "Using $JAVA_HOME as default JAVA_HOME."
+ echo "Note, this will be overridden by -java-home if it is set."
+ declare java_cmd="$JAVA_HOME/bin/java"
+ declare java_cmd=java
+echoerr () {
+ echo 1>&2 "$@"
+vlog () {
+ [[ $verbose || $debug ]] && echoerr "$@"
+dlog () {
+ [[ $debug ]] && echoerr "$@"
+acquire_sbt_jar () {
+ SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
+ URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+ URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+ JAR=build/sbt-launch-${SBT_VERSION}.jar
+ sbt_jar=$JAR
+ if [[ ! -f "$sbt_jar" ]]; then
+ # Download sbt launch jar if it hasn't been downloaded yet
+ if [ ! -f "${JAR}" ]; then
+ # Download
+ printf "Attempting to fetch sbt\n"
+ JAR_DL="${JAR}.part"
+ if hash curl 2>/dev/null; then
+ (curl --silent ${URL1} > "${JAR_DL}" || curl --silent ${URL2} > "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
+ elif hash wget 2>/dev/null; then
+ (wget --quiet ${URL1} -O "${JAR_DL}" || wget --quiet ${URL2} -O "${JAR_DL}") && mv "${JAR_DL}" "${JAR}"
+ else
+ printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
+ exit -1
+ fi
+ fi
+ if [ ! -f "${JAR}" ]; then
+ # We failed to download
+ printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
+ exit -1
+ fi
+ printf "Launching sbt from ${JAR}\n"
+ fi
+execRunner () {
+ # print the arguments one to a line, quoting any containing spaces
+ [[ $verbose || $debug ]] && echo "# Executing command line:" && {
+ for arg; do
+ if printf "%s\n" "$arg" | grep -q ' '; then
+ printf "\"%s\"\n" "$arg"
+ else
+ printf "%s\n" "$arg"
+ fi
+ done
+ echo ""
+ }
+ exec "$@"
+addJava () {
+ dlog "[addJava] arg = '$1'"
+ java_args=( "${java_args[@]}" "$1" )
+enableProfile () {
+ dlog "[enableProfile] arg = '$1'"
+ maven_profiles=( "${maven_profiles[@]}" "$1" )
+ export SBT_MAVEN_PROFILES="${maven_profiles[@]}"
+addSbt () {
+ dlog "[addSbt] arg = '$1'"
+ sbt_commands=( "${sbt_commands[@]}" "$1" )
+addResidual () {
+ dlog "[residual] arg = '$1'"
+ residual_args=( "${residual_args[@]}" "$1" )
+addDebugger () {
+ addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1"
+# a ham-fisted attempt to move some memory settings in concert
+# so they need not be dicked around with individually.
+get_mem_opts () {
+ local mem=${1:-2048}
+ local perm=$(( $mem / 4 ))
+ (( $perm > 256 )) || perm=256
+ (( $perm < 4096 )) || perm=4096
+ local codecache=$(( $perm / 2 ))
+ echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m"
+require_arg () {
+ local type="$1"
+ local opt="$2"
+ local arg="$3"
+ if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
+ echo "$opt requires <$type> argument" 1>&2
+ exit 1
+ fi
+is_function_defined() {
+ declare -f "$1" > /dev/null
+process_args () {
+ while [[ $# -gt 0 ]]; do
+ case "$1" in
+ -h|-help) usage; exit 1 ;;
+ -v|-verbose) verbose=1 && shift ;;
+ -d|-debug) debug=1 && shift ;;
+ -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
+ -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
+ -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
+ -batch) exec </dev/null && shift ;;
+ -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
+ -sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;;
+ -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;
+ -D*) addJava "$1" && shift ;;
+ -J*) addJava "${1:2}" && shift ;;
+ -P*) enableProfile "$1" && shift ;;
+ *) addResidual "$1" && shift ;;
+ esac
+ done
+ is_function_defined process_my_args && {
+ myargs=("${residual_args[@]}")
+ residual_args=()
+ process_my_args "${myargs[@]}"
+ }
+run() {
+ # no jar? download it.
+ [[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
+ # still no jar? uh-oh.
+ echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
+ exit 1
+ }
+ # process the combined args, then reset "$@" to the residuals
+ process_args "$@"
+ set -- "${residual_args[@]}"
+ argumentCount=$#
+ # run sbt
+ execRunner "$java_cmd" \
+ ${SBT_OPTS:-$default_sbt_opts} \
+ $(get_mem_opts $sbt_mem) \
+ ${java_opts} \
+ ${java_args[@]} \
+ -jar "$sbt_jar" \
+ "${sbt_commands[@]}" \
+ "${residual_args[@]}"