aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2014-04-24 23:59:16 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-04-24 23:59:16 -0700
commitdc3b640a0ab3501b678b591be3e99fbcf3badbec (patch)
tree2865c2a3cef66f061d846f6a968725e83728271b /bin
parent6e101f1183f92769779bc8ac14813c063bf1ff3f (diff)
downloadspark-dc3b640a0ab3501b678b591be3e99fbcf3badbec.tar.gz
spark-dc3b640a0ab3501b678b591be3e99fbcf3badbec.tar.bz2
spark-dc3b640a0ab3501b678b591be3e99fbcf3badbec.zip
SPARK-1619 Launch spark-shell with spark-submit
This simplifies the shell a bunch and passes all arguments through to spark-submit. There is a tiny incompatibility from 0.9.1 which is that you can't put `-c` _or_ `--cores`, only `--cores`. However, spark-submit will give a good error message in this case, I don't think many people used this, and it's a trivial change for users. Author: Patrick Wendell <pwendell@gmail.com> Closes #542 from pwendell/spark-shell and squashes the following commits: 9eb3e6f [Patrick Wendell] Updating Spark docs b552459 [Patrick Wendell] Andrew's feedback 97720fa [Patrick Wendell] Review feedback aa2900b [Patrick Wendell] SPARK-1619 Launch spark-shell with spark-submit
Diffstat (limited to 'bin')
-rwxr-xr-xbin/spark-shell177
-rwxr-xr-xbin/spark-submit10
2 files changed, 14 insertions, 173 deletions
diff --git a/bin/spark-shell b/bin/spark-shell
index ea12d256b2..f1f3c18877 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -19,9 +19,8 @@
#
# Shell script for starting the Spark Shell REPL
-# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
-# if those two env vars are set in spark-env.sh but MASTER is not.
+args="$@"
cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
@@ -30,133 +29,16 @@ esac
# Enter posix mode for bash
set -o posix
+if [[ "$@" == *--help* ]]; then
+ echo "Usage: ./bin/spark-shell [options]"
+ ./bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+ exit 0
+fi
+
## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"
-SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
-DEFAULT_MASTER="local[*]"
-MASTER=${MASTER:-""}
-
-info_log=0
-
-#CLI Color Templates
-txtund=$(tput sgr 0 1) # Underline
-txtbld=$(tput bold) # Bold
-bldred=${txtbld}$(tput setaf 1) # red
-bldyel=${txtbld}$(tput setaf 3) # yellow
-bldblu=${txtbld}$(tput setaf 4) # blue
-bldwht=${txtbld}$(tput setaf 7) # white
-txtrst=$(tput sgr0) # Reset
-info=${bldwht}*${txtrst} # Feedback
-pass=${bldblu}*${txtrst}
-warn=${bldred}*${txtrst}
-ques=${bldblu}?${txtrst}
-
-# Helper function to describe the script usage
-function usage() {
- cat << EOF
-${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
-
-${txtbld}OPTIONS${txtrst}:
- -h --help : Print this help information.
- -c --cores : The maximum number of cores to be used by the Spark Shell.
- -em --executor-memory : The memory used by each executor of the Spark Shell, the number
- is followed by m for megabytes or g for gigabytes, e.g. "1g".
- -dm --driver-memory : The memory used by the Spark Shell, the number is followed
- by m for megabytes or g for gigabytes, e.g. "1g".
- -m --master : A full string that describes the Spark Master, defaults to "local[*]"
- e.g. "spark://localhost:7077".
- --log-conf : Enables logging of the supplied SparkConf as INFO at start of the
- Spark Context.
-
-e.g.
- spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
-
-EOF
-}
-
-function out_error(){
- echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
- usage
- exit 1
-}
-
-function log_info(){
- [ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
-}
-
-function log_warn(){
- echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
-}
-
-# PATTERNS used to validate more than one optional arg.
-ARG_FLAG_PATTERN="^-"
-MEM_PATTERN="^[0-9]+[m|g|M|G]$"
-NUM_PATTERN="^[0-9]+$"
-PORT_PATTERN="^[0-9]+$"
-
-# Setters for optional args.
-function set_cores(){
- CORE_PATTERN="^[0-9]+$"
- if [[ "$1" =~ $CORE_PATTERN ]]; then
- SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
- else
- out_error "wrong format for $2"
- fi
-}
-
-function set_em(){
- if [[ $1 =~ $MEM_PATTERN ]]; then
- SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
- else
- out_error "wrong format for $2"
- fi
-}
-
-function set_dm(){
- if [[ $1 =~ $MEM_PATTERN ]]; then
- export SPARK_DRIVER_MEMORY=$1
- else
- out_error "wrong format for $2"
- fi
-}
-
-function set_spark_log_conf(){
- SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
-}
-
-function set_spark_master(){
- if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
- export MASTER="$1"
- else
- out_error "wrong format for $2"
- fi
-}
-
-function resolve_spark_master(){
- # Set MASTER from spark-env if possible
- DEFAULT_SPARK_MASTER_PORT=7077
- if [ -z "$MASTER" ]; then
- . $FWDIR/bin/load-spark-env.sh
- if [ -n "$SPARK_MASTER_IP" ]; then
- SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
- export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
- fi
- fi
-
- if [ -z "$MASTER" ]; then
- export MASTER="$DEFAULT_MASTER"
- fi
-
-}
-
function main(){
- log_info "Base Directory set to $FWDIR"
-
- resolve_spark_master
- log_info "Spark Master is $MASTER"
-
- log_info "Spark REPL options $SPARK_REPL_OPTS"
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
@@ -165,55 +47,14 @@ function main(){
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
- $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
+ $FWDIR/bin/spark-submit spark-internal "$args" --class org.apache.spark.repl.Main
stty icanon echo > /dev/null 2>&1
else
export SPARK_REPL_OPTS
- $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
+ $FWDIR/bin/spark-submit spark-internal "$args" --class org.apache.spark.repl.Main
fi
}
-for option in "$@"
-do
- case $option in
- -h | --help )
- usage
- exit 1
- ;;
- -c | --cores)
- shift
- _1=$1
- shift
- set_cores $_1 "-c/--cores"
- ;;
- -em | --executor-memory)
- shift
- _1=$1
- shift
- set_em $_1 "-em/--executor-memory"
- ;;
- -dm | --driver-memory)
- shift
- _1=$1
- shift
- set_dm $_1 "-dm/--driver-memory"
- ;;
- -m | --master)
- shift
- _1=$1
- shift
- set_spark_master $_1 "-m/--master"
- ;;
- --log-conf)
- shift
- set_spark_log_conf "true"
- info_log=1
- ;;
- ?)
- ;;
- esac
-done
-
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
exit_status=127
diff --git a/bin/spark-submit b/bin/spark-submit
index 498d0b27ba..b2a1dca721 100755
--- a/bin/spark-submit
+++ b/bin/spark-submit
@@ -21,15 +21,15 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
ORIG_ARGS=$@
while (($#)); do
- if [ $1 = "--deploy-mode" ]; then
+ if [ "$1" = "--deploy-mode" ]; then
DEPLOY_MODE=$2
- elif [ $1 = "--driver-memory" ]; then
+ elif [ "$1" = "--driver-memory" ]; then
DRIVER_MEMORY=$2
- elif [ $1 = "--driver-library-path" ]; then
+ elif [ "$1" = "--driver-library-path" ]; then
export _SPARK_LIBRARY_PATH=$2
- elif [ $1 = "--driver-class-path" ]; then
+ elif [ "$1" = "--driver-class-path" ]; then
export SPARK_CLASSPATH="$SPARK_CLASSPATH:$2"
- elif [ $1 = "--driver-java-options" ]; then
+ elif [ "$1" = "--driver-java-options" ]; then
export SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $2"
fi
shift