aboutsummaryrefslogblamecommitdiff
path: root/bin/spark-shell
blob: ea12d256b23a18dcc820538feba432c8d71acf1b (plain) (tree)
1
                   

















                                                                          

                                                

                                                                                 




                          
 


                           
                          
                                  
 
                                        
                         




























                                                                                              
                                                                                                   






















                                                                                               
 








                                                       
                                        
                                                               
        
                                       
      


                  
                                     
                                                                   
        
                                     
      


                  
                                     
                                   
        
                                     
      




                                                         
 

                                              
                          
        















                                                                                  
                                       
      

































































                                                                                     
 



























                                                                                            
    




                                                
 
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Shell script for starting the Spark Shell REPL
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
# if those two env vars are set in spark-env.sh but MASTER is not.

cygwin=false
case "`uname`" in
    CYGWIN*) cygwin=true;;
esac

# Enter posix mode for bash
set -o posix

## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"

SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
DEFAULT_MASTER="local[*]"
MASTER=${MASTER:-""}

info_log=0

#CLI Color Templates
txtund=$(tput sgr 0 1)          # Underline
txtbld=$(tput bold)             # Bold
bldred=${txtbld}$(tput setaf 1) # red
bldyel=${txtbld}$(tput setaf 3) # yellow
bldblu=${txtbld}$(tput setaf 4) # blue
bldwht=${txtbld}$(tput setaf 7) # white
txtrst=$(tput sgr0)             # Reset
info=${bldwht}*${txtrst}        # Feedback
pass=${bldblu}*${txtrst}
warn=${bldred}*${txtrst}
ques=${bldblu}?${txtrst}

# Helper function to describe the script usage
function usage() {
    cat << EOF
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]

${txtbld}OPTIONS${txtrst}:
    -h  --help              : Print this help information.
    -c  --cores             : The maximum number of cores to be used by the Spark Shell.
    -em --executor-memory   : The memory used by each executor of the Spark Shell, the number 
                              is followed by m for megabytes or g for gigabytes, e.g. "1g".
    -dm --driver-memory     : The memory used by the Spark Shell, the number is followed 
                              by m for megabytes or g for gigabytes, e.g. "1g".
    -m  --master            : A full string that describes the Spark Master, defaults to "local[*]"
                              e.g. "spark://localhost:7077".
    --log-conf              : Enables logging of the supplied SparkConf as INFO at start of the
                              Spark Context.

e.g.
    spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g

EOF
}

function out_error(){
    echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
    usage
    exit 1
}

function log_info(){
    [ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
}

function log_warn(){
    echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
}

# PATTERNS used to validate more than one optional arg.
ARG_FLAG_PATTERN="^-"
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
NUM_PATTERN="^[0-9]+$"
PORT_PATTERN="^[0-9]+$"

# Setters for optional args.
function set_cores(){
    CORE_PATTERN="^[0-9]+$"
    if [[ "$1" =~ $CORE_PATTERN ]]; then
        SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
    else
        out_error "wrong format for $2"
    fi
}

function set_em(){
    if [[ $1 =~ $MEM_PATTERN ]]; then
      SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
    else
      out_error "wrong format for $2"
    fi
}

function set_dm(){
    if [[ $1 =~ $MEM_PATTERN ]]; then
      export SPARK_DRIVER_MEMORY=$1
    else
      out_error "wrong format for $2"
    fi
}

function set_spark_log_conf(){
    SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
}

function set_spark_master(){
    if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
        export MASTER="$1"
    else
        out_error "wrong format for $2"
    fi
}

function resolve_spark_master(){
    # Set MASTER from spark-env if possible
    DEFAULT_SPARK_MASTER_PORT=7077
    if [ -z "$MASTER" ]; then
        . $FWDIR/bin/load-spark-env.sh
        if [ -n "$SPARK_MASTER_IP" ]; then
            SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
            export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
        fi
    fi

    if [ -z "$MASTER" ]; then
        export MASTER="$DEFAULT_MASTER"
    fi

}

function main(){
    log_info "Base Directory set to $FWDIR"
    
    resolve_spark_master
    log_info "Spark Master is $MASTER"

    log_info "Spark REPL options  $SPARK_REPL_OPTS"
    if $cygwin; then
        # Workaround for issue involving JLine and Cygwin
        # (see http://sourceforge.net/p/jline/bugs/40/).
        # If you're using the Mintty terminal emulator in Cygwin, may need to set the
        # "Backspace sends ^H" setting in "Keys" section of the Mintty options
        # (see https://github.com/sbt/sbt/issues/562).
        stty -icanon min 1 -echo > /dev/null 2>&1
        export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
        $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
        stty icanon echo > /dev/null 2>&1
    else
        export SPARK_REPL_OPTS
        $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
    fi
}

for option in "$@"
do
     case $option in
         -h  | --help )
             usage
             exit 1
             ;;
         -c  | --cores)
             shift
             _1=$1
             shift
             set_cores $_1 "-c/--cores"
             ;;
         -em | --executor-memory)
             shift
             _1=$1
             shift
             set_em $_1 "-em/--executor-memory"
             ;;
         -dm | --driver-memory)
             shift
             _1=$1
             shift
             set_dm $_1 "-dm/--driver-memory"
             ;;
         -m | --master)
             shift
             _1=$1
             shift
             set_spark_master $_1 "-m/--master"
             ;;
         --log-conf)
             shift
             set_spark_log_conf "true"
             info_log=1
             ;;
         ?)
             ;;
     esac
done

# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
exit_status=127
saved_stty=""

# restore stty settings (echo in particular)
function restoreSttySettings() {
  stty $saved_stty
  saved_stty=""
}

function onExit() {
  if [[ "$saved_stty" != "" ]]; then
    restoreSttySettings
  fi
  exit $exit_status
}

# to reenable echo if we are interrupted before completing.
trap onExit INT

# save terminal settings
saved_stty=$(stty -g 2>/dev/null)
# clear on error so we don't later try to restore them
if [[ ! $? ]]; then
  saved_stty=""
fi

main

# record the exit status lest it be overwritten:
# then reenable echo and propagate the code.
exit_status=$?
onExit