aboutsummaryrefslogtreecommitdiff
path: root/bin/spark-shell
diff options
context:
space:
mode:
authorBernardo Gomez Palacio <bernardo.gomezpalacio@gmail.com>2014-03-29 19:49:22 -0700
committerAaron Davidson <aaron@databricks.com>2014-03-29 19:49:22 -0700
commitfda86d8b46a1cc484d11ac5446d8cc2a86429b9b (patch)
tree3f9d12736e49f1bc45a18b4a4eaf40651a7875fd /bin/spark-shell
parentaf3746ce0d724dc624658a2187bde188ab26d084 (diff)
downloadspark-fda86d8b46a1cc484d11ac5446d8cc2a86429b9b.tar.gz
spark-fda86d8b46a1cc484d11ac5446d8cc2a86429b9b.tar.bz2
spark-fda86d8b46a1cc484d11ac5446d8cc2a86429b9b.zip
[SPARK-1186] : Enrich the Spark Shell to support additional arguments.
Enrich the Spark Shell functionality to support the following options. ``` Usage: spark-shell [OPTIONS] OPTIONS: -h --help : Print this help information. -c --cores : The maximum number of cores to be used by the Spark Shell. -em --executor-memory : The memory used by each executor of the Spark Shell, the number is followed by m for megabytes or g for gigabytes, e.g. "1g". -dm --driver-memory : The memory used by the Spark Shell, the number is followed by m for megabytes or g for gigabytes, e.g. "1g". -m --master : A full string that describes the Spark Master, defaults to "local" e.g. "spark://localhost:7077". --log-conf : Enables logging of the supplied SparkConf as INFO at start of the Spark Context. e.g. spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g ``` **Note**: this commit reflects the changes applied to _master_ based on [5d98cfc1]. [ticket: SPARK-1186] : Enrich the Spark Shell to support additional arguments. https://spark-project.atlassian.net/browse/SPARK-1186 Author : bernardo.gomezpalcio@gmail.com Author: Bernardo Gomez Palacio <bernardo.gomezpalacio@gmail.com> Closes #116 from berngp/feature/enrich-spark-shell and squashes the following commits: c5f455f [Bernardo Gomez Palacio] [SPARK-1186] : Enrich the Spark Shell to support additional arguments.
Diffstat (limited to 'bin/spark-shell')
-rwxr-xr-xbin/spark-shell226
1 files changed, 168 insertions, 58 deletions
diff --git a/bin/spark-shell b/bin/spark-shell
index 861ab60654..fac006cf49 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -30,67 +30,189 @@ esac
# Enter posix mode for bash
set -o posix
-CORE_PATTERN="^[0-9]+$"
-MEM_PATTERN="^[0-9]+[m|g|M|G]$"
-
+## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"
-if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
- echo "Usage: spark-shell [OPTIONS]"
- echo "OPTIONS:"
- echo "-c --cores num, the maximum number of cores to be used by the spark shell"
- echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
- echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
- echo "-h --help, print this help information"
- exit
-fi
+SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
+DEFAULT_MASTER="local"
+MASTER=${MASTER:-""}
+
+info_log=0
+
+#CLI Color Templates
+txtund=$(tput sgr 0 1) # Underline
+txtbld=$(tput bold) # Bold
+bldred=${txtbld}$(tput setaf 1) # red
+bldyel=${txtbld}$(tput setaf 3) # yellow
+bldblu=${txtbld}$(tput setaf 4) # blue
+bldwht=${txtbld}$(tput setaf 7) # white
+txtrst=$(tput sgr0) # Reset
+info=${bldwht}*${txtrst} # Feedback
+pass=${bldblu}*${txtrst}
+warn=${bldred}*${txtrst}
+ques=${bldblu}?${txtrst}
+
+# Helper function to describe the script usage
+function usage() {
+ cat << EOF
+${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
+
+${txtbld}OPTIONS${txtrst}:
+ -h --help : Print this help information.
+ -c --cores : The maximum number of cores to be used by the Spark Shell.
+ -em --executor-memory : The memory used by each executor of the Spark Shell, the number
+ is followed by m for megabytes or g for gigabytes, e.g. "1g".
+ -dm --driver-memory : The memory used by the Spark Shell, the number is followed
+ by m for megabytes or g for gigabytes, e.g. "1g".
+ -m --master : A full string that describes the Spark Master, defaults to "local"
+ e.g. "spark://localhost:7077".
+ --log-conf : Enables logging of the supplied SparkConf as INFO at start of the
+ Spark Context.
+
+e.g.
+ spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
+
+EOF
+}
+
+function out_error(){
+ echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
+ usage
+ exit 1
+}
+
+function log_info(){
+ [ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
+}
+
+function log_warn(){
+ echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
+}
-for o in "$@"; do
- if [ "$1" = "-c" -o "$1" = "--cores" ]; then
- shift
+# PATTERNS used to validate more than one optional arg.
+ARG_FLAG_PATTERN="^-"
+MEM_PATTERN="^[0-9]+[m|g|M|G]$"
+NUM_PATTERN="^[0-9]+$"
+PORT_PATTERN="^[0-9]+$"
+
+# Setters for optional args.
+function set_cores(){
+ CORE_PATTERN="^[0-9]+$"
if [[ "$1" =~ $CORE_PATTERN ]]; then
- SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
- shift
+ SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
else
- echo "ERROR: wrong format for -c/--cores"
- exit 1
+ out_error "wrong format for $2"
fi
- fi
- if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
- shift
+}
+
+function set_em(){
if [[ $1 =~ $MEM_PATTERN ]]; then
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
- shift
else
- echo "ERROR: wrong format for --execmem/-em"
- exit 1
+ out_error "wrong format for $2"
fi
- fi
- if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
- shift
+}
+
+function set_dm(){
if [[ $1 =~ $MEM_PATTERN ]]; then
export SPARK_DRIVER_MEMORY=$1
- shift
else
- echo "ERROR: wrong format for --drivermem/-dm"
- exit 1
+ out_error "wrong format for $2"
fi
- fi
-done
+}
+
+function set_spark_log_conf(){
+ SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
+}
-# Set MASTER from spark-env if possible
-DEFAULT_SPARK_MASTER_PORT=7077
-if [ -z "$MASTER" ]; then
- . $FWDIR/bin/load-spark-env.sh
- if [ "x" != "x$SPARK_MASTER_IP" ]; then
- if [ "y" != "y$SPARK_MASTER_PORT" ]; then
- SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
+function set_spark_master(){
+ if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
+ MASTER="$1"
else
- SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
+ out_error "wrong format for $2"
+ fi
+}
+
+function resolve_spark_master(){
+ # Set MASTER from spark-env if possible
+ DEFAULT_SPARK_MASTER_PORT=7077
+ if [ -z "$MASTER" ]; then
+ . $FWDIR/bin/load-spark-env.sh
+ if [ -n "$SPARK_MASTER_IP" ]; then
+ SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
+ export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
+ fi
+ fi
+
+ if [ -z "$MASTER" ]; then
+ MASTER="$DEFAULT_MASTER"
fi
- export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
- fi
-fi
+
+}
+
+function main(){
+ log_info "Base Directory set to $FWDIR"
+
+ resolve_spark_master
+ log_info "Spark Master is $MASTER"
+
+ log_info "Spark REPL options $SPARK_REPL_OPTS"
+ if $cygwin; then
+ # Workaround for issue involving JLine and Cygwin
+ # (see http://sourceforge.net/p/jline/bugs/40/).
+ # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+ # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+ # (see https://github.com/sbt/sbt/issues/562).
+ stty -icanon min 1 -echo > /dev/null 2>&1
+ export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
+ $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
+ stty icanon echo > /dev/null 2>&1
+ else
+ export SPARK_REPL_OPTS
+ $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
+ fi
+}
+
+for option in "$@"
+do
+ case $option in
+ -h | --help )
+ usage
+ exit 1
+ ;;
+ -c | --cores)
+ shift
+ _1=$1
+ shift
+ set_cores $_1 "-c/--cores"
+ ;;
+ -em | --executor-memory)
+ shift
+ _1=$1
+ shift
+ set_em $_1 "-em/--executor-memory"
+ ;;
+ -dm | --driver-memory)
+ shift
+ _1=$1
+ shift
+ set_dm $_1 "-dm/--driver-memory"
+ ;;
+ -m | --master)
+ shift
+ _1=$1
+ shift
+ set_spark_master $_1 "-m/--master"
+ ;;
+ --log-conf)
+ shift
+ set_spark_log_conf "true"
+ info_log=1
+ ;;
+ ?)
+ ;;
+ esac
+done
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
@@ -120,22 +242,10 @@ if [[ ! $? ]]; then
saved_stty=""
fi
-if $cygwin; then
- # Workaround for issue involving JLine and Cygwin
- # (see http://sourceforge.net/p/jline/bugs/40/).
- # If you're using the Mintty terminal emulator in Cygwin, may need to set the
- # "Backspace sends ^H" setting in "Keys" section of the Mintty options
- # (see https://github.com/sbt/sbt/issues/562).
- stty -icanon min 1 -echo > /dev/null 2>&1
- export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
- $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
- stty icanon echo > /dev/null 2>&1
-else
- export SPARK_REPL_OPTS
- $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
-fi
+main
# record the exit status lest it be overwritten:
# then reenable echo and propagate the code.
exit_status=$?
onExit
+