aboutsummaryrefslogtreecommitdiff
path: root/bin/spark-shell
blob: ea12d256b23a18dcc820538feba432c8d71acf1b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Shell script for starting the Spark Shell REPL
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
# if those two env vars are set in spark-env.sh but MASTER is not.

cygwin=false
case "`uname`" in
    CYGWIN*) cygwin=true;;
esac

# Enter posix mode for bash
set -o posix

## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"

SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
DEFAULT_MASTER="local[*]"
MASTER=${MASTER:-""}

info_log=0

#CLI Color Templates
txtund=$(tput sgr 0 1)          # Underline
txtbld=$(tput bold)             # Bold
bldred=${txtbld}$(tput setaf 1) # red
bldyel=${txtbld}$(tput setaf 3) # yellow
bldblu=${txtbld}$(tput setaf 4) # blue
bldwht=${txtbld}$(tput setaf 7) # white
txtrst=$(tput sgr0)             # Reset
info=${bldwht}*${txtrst}        # Feedback
pass=${bldblu}*${txtrst}
warn=${bldred}*${txtrst}
ques=${bldblu}?${txtrst}

# Helper function to describe the script usage
function usage() {
    cat << EOF
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]

${txtbld}OPTIONS${txtrst}:
    -h  --help              : Print this help information.
    -c  --cores             : The maximum number of cores to be used by the Spark Shell.
    -em --executor-memory   : The memory used by each executor of the Spark Shell, the number 
                              is followed by m for megabytes or g for gigabytes, e.g. "1g".
    -dm --driver-memory     : The memory used by the Spark Shell, the number is followed 
                              by m for megabytes or g for gigabytes, e.g. "1g".
    -m  --master            : A full string that describes the Spark Master, defaults to "local[*]"
                              e.g. "spark://localhost:7077".
    --log-conf              : Enables logging of the supplied SparkConf as INFO at start of the
                              Spark Context.

e.g.
    spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g

EOF
}

function out_error(){
    echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
    usage
    exit 1
}

function log_info(){
    [ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
}

function log_warn(){
    echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
}

# PATTERNS used to validate more than one optional arg.
ARG_FLAG_PATTERN="^-"
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
NUM_PATTERN="^[0-9]+$"
PORT_PATTERN="^[0-9]+$"

# Setters for optional args.
function set_cores(){
    CORE_PATTERN="^[0-9]+$"
    if [[ "$1" =~ $CORE_PATTERN ]]; then
        SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
    else
        out_error "wrong format for $2"
    fi
}

function set_em(){
    if [[ $1 =~ $MEM_PATTERN ]]; then
      SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
    else
      out_error "wrong format for $2"
    fi
}

function set_dm(){
    if [[ $1 =~ $MEM_PATTERN ]]; then
      export SPARK_DRIVER_MEMORY=$1
    else
      out_error "wrong format for $2"
    fi
}

function set_spark_log_conf(){
    SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
}

function set_spark_master(){
    if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
        export MASTER="$1"
    else
        out_error "wrong format for $2"
    fi
}

function resolve_spark_master(){
    # Set MASTER from spark-env if possible
    DEFAULT_SPARK_MASTER_PORT=7077
    if [ -z "$MASTER" ]; then
        . $FWDIR/bin/load-spark-env.sh
        if [ -n "$SPARK_MASTER_IP" ]; then
            SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
            export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
        fi
    fi

    if [ -z "$MASTER" ]; then
        export MASTER="$DEFAULT_MASTER"
    fi

}

function main(){
    log_info "Base Directory set to $FWDIR"
    
    resolve_spark_master
    log_info "Spark Master is $MASTER"

    log_info "Spark REPL options  $SPARK_REPL_OPTS"
    if $cygwin; then
        # Workaround for issue involving JLine and Cygwin
        # (see http://sourceforge.net/p/jline/bugs/40/).
        # If you're using the Mintty terminal emulator in Cygwin, may need to set the
        # "Backspace sends ^H" setting in "Keys" section of the Mintty options
        # (see https://github.com/sbt/sbt/issues/562).
        stty -icanon min 1 -echo > /dev/null 2>&1
        export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
        $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
        stty icanon echo > /dev/null 2>&1
    else
        export SPARK_REPL_OPTS
        $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
    fi
}

for option in "$@"
do
     case $option in
         -h  | --help )
             usage
             exit 1
             ;;
         -c  | --cores)
             shift
             _1=$1
             shift
             set_cores $_1 "-c/--cores"
             ;;
         -em | --executor-memory)
             shift
             _1=$1
             shift
             set_em $_1 "-em/--executor-memory"
             ;;
         -dm | --driver-memory)
             shift
             _1=$1
             shift
             set_dm $_1 "-dm/--driver-memory"
             ;;
         -m | --master)
             shift
             _1=$1
             shift
             set_spark_master $_1 "-m/--master"
             ;;
         --log-conf)
             shift
             set_spark_log_conf "true"
             info_log=1
             ;;
         ?)
             ;;
     esac
done

# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
exit_status=127
saved_stty=""

# restore stty settings (echo in particular)
function restoreSttySettings() {
  stty $saved_stty
  saved_stty=""
}

function onExit() {
  if [[ "$saved_stty" != "" ]]; then
    restoreSttySettings
  fi
  exit $exit_status
}

# to reenable echo if we are interrupted before completing.
trap onExit INT

# save terminal settings
saved_stty=$(stty -g 2>/dev/null)
# clear on error so we don't later try to restore them
if [[ ! $? ]]; then
  saved_stty=""
fi

main

# record the exit status lest it be overwritten:
# then reenable echo and propagate the code.
exit_status=$?
onExit