#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Shell script for starting the Spark Shell REPL # Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT} # if those two env vars are set in spark-env.sh but MASTER is not. cygwin=false case "`uname`" in CYGWIN*) cygwin=true;; esac # Enter posix mode for bash set -o posix ## Global script variables FWDIR="$(cd `dirname $0`/..; pwd)" SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}" DEFAULT_MASTER="local[*]" MASTER=${MASTER:-""} info_log=0 #CLI Color Templates txtund=$(tput sgr 0 1) # Underline txtbld=$(tput bold) # Bold bldred=${txtbld}$(tput setaf 1) # red bldyel=${txtbld}$(tput setaf 3) # yellow bldblu=${txtbld}$(tput setaf 4) # blue bldwht=${txtbld}$(tput setaf 7) # white txtrst=$(tput sgr0) # Reset info=${bldwht}*${txtrst} # Feedback pass=${bldblu}*${txtrst} warn=${bldred}*${txtrst} ques=${bldblu}?${txtrst} # Helper function to describe the script usage function usage() { cat << EOF ${txtbld}Usage${txtrst}: spark-shell [OPTIONS] ${txtbld}OPTIONS${txtrst}: -h --help : Print this help information. -c --cores : The maximum number of cores to be used by the Spark Shell. -em --executor-memory : The memory used by each executor of the Spark Shell, the number is followed by m for megabytes or g for gigabytes, e.g. "1g". -dm --driver-memory : The memory used by the Spark Shell, the number is followed by m for megabytes or g for gigabytes, e.g. "1g". -m --master : A full string that describes the Spark Master, defaults to "local[*]" e.g. "spark://localhost:7077". --log-conf : Enables logging of the supplied SparkConf as INFO at start of the Spark Context. e.g. spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g EOF } function out_error(){ echo -e "${txtund}${bldred}ERROR${txtrst}: $1" usage exit 1 } function log_info(){ [ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1" } function log_warn(){ echo -e "${txtund}${bldyel}WARN${txtrst}: $1" } # PATTERNS used to validate more than one optional arg. ARG_FLAG_PATTERN="^-" MEM_PATTERN="^[0-9]+[m|g|M|G]$" NUM_PATTERN="^[0-9]+$" PORT_PATTERN="^[0-9]+$" # Setters for optional args. function set_cores(){ CORE_PATTERN="^[0-9]+$" if [[ "$1" =~ $CORE_PATTERN ]]; then SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1" else out_error "wrong format for $2" fi } function set_em(){ if [[ $1 =~ $MEM_PATTERN ]]; then SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1" else out_error "wrong format for $2" fi } function set_dm(){ if [[ $1 =~ $MEM_PATTERN ]]; then export SPARK_DRIVER_MEMORY=$1 else out_error "wrong format for $2" fi } function set_spark_log_conf(){ SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1" } function set_spark_master(){ if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then export MASTER="$1" else out_error "wrong format for $2" fi } function resolve_spark_master(){ # Set MASTER from spark-env if possible DEFAULT_SPARK_MASTER_PORT=7077 if [ -z "$MASTER" ]; then . $FWDIR/bin/load-spark-env.sh if [ -n "$SPARK_MASTER_IP" ]; then SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}" export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}" fi fi if [ -z "$MASTER" ]; then export MASTER="$DEFAULT_MASTER" fi } function main(){ log_info "Base Directory set to $FWDIR" resolve_spark_master log_info "Spark Master is $MASTER" log_info "Spark REPL options $SPARK_REPL_OPTS" if $cygwin; then # Workaround for issue involving JLine and Cygwin # (see http://sourceforge.net/p/jline/bugs/40/). # If you're using the Mintty terminal emulator in Cygwin, may need to set the # "Backspace sends ^H" setting in "Keys" section of the Mintty options # (see https://github.com/sbt/sbt/issues/562). stty -icanon min 1 -echo > /dev/null 2>&1 export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix" $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@" stty icanon echo > /dev/null 2>&1 else export SPARK_REPL_OPTS $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@" fi } for option in "$@" do case $option in -h | --help ) usage exit 1 ;; -c | --cores) shift _1=$1 shift set_cores $_1 "-c/--cores" ;; -em | --executor-memory) shift _1=$1 shift set_em $_1 "-em/--executor-memory" ;; -dm | --driver-memory) shift _1=$1 shift set_dm $_1 "-dm/--driver-memory" ;; -m | --master) shift _1=$1 shift set_spark_master $_1 "-m/--master" ;; --log-conf) shift set_spark_log_conf "true" info_log=1 ;; ?) ;; esac done # Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in # binary distribution of Spark where Scala is not installed exit_status=127 saved_stty="" # restore stty settings (echo in particular) function restoreSttySettings() { stty $saved_stty saved_stty="" } function onExit() { if [[ "$saved_stty" != "" ]]; then restoreSttySettings fi exit $exit_status } # to reenable echo if we are interrupted before completing. trap onExit INT # save terminal settings saved_stty=$(stty -g 2>/dev/null) # clear on error so we don't later try to restore them if [[ ! $? ]]; then saved_stty="" fi main # record the exit status lest it be overwritten: # then reenable echo and propagate the code. exit_status=$? onExit