diff options
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/slaves.sh | 59 | ||||
-rwxr-xr-x | bin/spark-config.sh | 19 | ||||
-rwxr-xr-x | bin/spark-daemon.sh | 135 | ||||
-rwxr-xr-x | bin/spark-daemons.sh | 18 | ||||
-rwxr-xr-x | bin/start-all.sh | 17 | ||||
-rwxr-xr-x | bin/start-master.sh | 10 | ||||
-rwxr-xr-x | bin/start-slaves.sh | 16 | ||||
-rwxr-xr-x | bin/stop-all.sh | 14 | ||||
-rwxr-xr-x | bin/stop-master.sh | 10 | ||||
-rwxr-xr-x | bin/stop-slaves.sh | 10 |
10 files changed, 308 insertions, 0 deletions
diff --git a/bin/slaves.sh b/bin/slaves.sh new file mode 100755 index 0000000000..e0dd883358 --- /dev/null +++ b/bin/slaves.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# Run a shell command on all slave hosts. +# +# Environment Variables +# +# SPARK_SLAVES File naming remote hosts. +# Default is ${SPARK_CONF_DIR}/slaves. +# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_HOME}/conf. +# SPARK_SLAVE_SLEEP Seconds to sleep between spawning remote commands. +# SPARK_SSH_OPTS Options passed to ssh when running remote commands. +## + +usage="Usage: slaves.sh [--config confdir] command..." + +# if no args specified, show usage +if [ $# -le 0 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +# If the slaves file is specified in the command line, +# then it takes precedence over the definition in +# spark-env.sh. Save it here. +HOSTLIST=$SPARK_SLAVES + +if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then + . "${SPARK_CONF_DIR}/spark-env.sh" +fi + +if [ "$HOSTLIST" = "" ]; then + if [ "$SPARK_SLAVES" = "" ]; then + export HOSTLIST="${SPARK_CONF_DIR}/slaves" + else + export HOSTLIST="${SPARK_SLAVES}" + fi +fi + +echo $"${@// /\\ }" + +# By default disable strict host key checking +if [ "$SPARK_SSH_OPTS" = "" ]; then + SPARK_SSH_OPTS="-o StrictHostKeyChecking=no" +fi + +for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do + ssh $SPARK_SSH_OPTS $slave $"${@// /\\ }" \ + 2>&1 | sed "s/^/$slave: /" & + if [ "$SPARK_SLAVE_SLEEP" != "" ]; then + sleep $SPARK_SLAVE_SLEEP + fi +done + +wait diff --git a/bin/spark-config.sh b/bin/spark-config.sh new file mode 100755 index 0000000000..d4b6558866 --- /dev/null +++ b/bin/spark-config.sh @@ -0,0 +1,19 @@ +# included in all the spark scripts with source command +# should not be executable directly +# also should not be passed any arguments, since we need original $* + +# resolve links - $0 may be a softlink +this="${BASH_SOURCE-$0}" +common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) +script="$(basename -- "$this")" +this="$common_bin/$script" + +# convert relative path to absolute path +config_bin=`dirname "$this"` +script=`basename "$this"` +config_bin=`cd "$config_bin"; pwd` +this="$config_bin/$script" + +export SPARK_PREFIX=`dirname "$this"`/.. +export SPARK_HOME=${SPARK_PREFIX} +export SPARK_CONF_DIR="$SPARK_HOME/conf"
\ No newline at end of file diff --git a/bin/spark-daemon.sh b/bin/spark-daemon.sh new file mode 100755 index 0000000000..b5ecd9c1a2 --- /dev/null +++ b/bin/spark-daemon.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash + +# Runs a Spark command as a daemon. +# +# Environment Variables +# +# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_PREFIX}/conf. +# SPARK_LOG_DIR Where log files are stored. PWD by default. +# SPARK_MASTER host:path where spark code should be rsync'd from +# SPARK_PID_DIR The pid files are stored. /tmp by default. +# SPARK_IDENT_STRING A string representing this instance of spark. $USER by default +# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0. +## + +usage="Usage: spark-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <spark-command> <args...>" + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +# get arguments +startStop=$1 +shift +command=$1 +shift + +spark_rotate_log () +{ + log=$1; + num=5; + if [ -n "$2" ]; then + num=$2 + fi + if [ -f "$log" ]; then # rotate logs + while [ $num -gt 1 ]; do + prev=`expr $num - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=$prev + done + mv "$log" "$log.$num"; + fi +} + +if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then + . "${SPARK_CONF_DIR}/spark-env.sh" +fi + +if [ "$SPARK_IDENT_STRING" = "" ]; then + export SPARK_IDENT_STRING="$USER" +fi + +# get log directory +if [ "$SPARK_LOG_DIR" = "" ]; then + export SPARK_LOG_DIR="$SPARK_HOME/logs" +fi +mkdir -p "$SPARK_LOG_DIR" +touch $SPARK_LOG_DIR/.spark_test > /dev/null 2>&1 +TEST_LOG_DIR=$? +if [ "${TEST_LOG_DIR}" = "0" ]; then + rm -f $SPARK_LOG_DIR/.spark_test +else + chown $SPARK_IDENT_STRING $SPARK_LOG_DIR +fi + +if [ "$SPARK_PID_DIR" = "" ]; then + SPARK_PID_DIR=/tmp +fi + +# some variables +export SPARK_LOGFILE=spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.log +export SPARK_ROOT_LOGGER="INFO,DRFA" +log=$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.out +pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command.pid + +# Set default scheduling priority +if [ "$SPARK_NICENESS" = "" ]; then + export SPARK_NICENESS=0 +fi + + +case $startStop in + + (start) + + mkdir -p "$SPARK_PID_DIR" + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo $command running as process `cat $pid`. Stop it first. + exit 1 + fi + fi + + if [ "$SPARK_MASTER" != "" ]; then + echo rsync from $SPARK_MASTER + rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME" + fi + + spark_rotate_log $log + echo starting $command, logging to $log + cd "$SPARK_PREFIX" + nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/run $command "$@" > "$log" 2>&1 < /dev/null & + echo $! > $pid + sleep 1; head "$log" + ;; + + (stop) + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo stopping $command + kill `cat $pid` + else + echo no $command to stop + fi + else + echo no $command to stop + fi + ;; + + (*) + echo $usage + exit 1 + ;; + +esac + + diff --git a/bin/spark-daemons.sh b/bin/spark-daemons.sh new file mode 100755 index 0000000000..4f9719ee80 --- /dev/null +++ b/bin/spark-daemons.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Run a Spark command on all slave hosts. + +usage="Usage: spark-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..." + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/spark-daemon.sh" "$@" diff --git a/bin/start-all.sh b/bin/start-all.sh new file mode 100755 index 0000000000..9bd6c50654 --- /dev/null +++ b/bin/start-all.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Start all spark daemons. +# Starts the master on this node. +# Starts a worker on each node specified in conf/slaves + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +# Load the Spark configuration +. "$bin/spark-config.sh" + +# Start Master +"$bin"/start-master.sh --config $SPARK_CONF_DIR + +# Start Workers +"$bin"/start-slaves.sh --config $SPARK_CONF_DIR
\ No newline at end of file diff --git a/bin/start-master.sh b/bin/start-master.sh new file mode 100755 index 0000000000..6403c944a4 --- /dev/null +++ b/bin/start-master.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# Starts the master on the machine this script is executed on. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +"$bin"/spark-daemon.sh start spark.deploy.master.Master
\ No newline at end of file diff --git a/bin/start-slaves.sh b/bin/start-slaves.sh new file mode 100755 index 0000000000..eb7663101b --- /dev/null +++ b/bin/start-slaves.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +# Find the port number for the master +if [ "$SPARK_MASTER_PORT" = "" ]; then + SPARK_MASTER_PORT=7077 +fi + +hostname=`hostname` +ip=`host "$hostname" | cut -d " " -f 4` + +"$bin"/spark-daemons.sh start spark.deploy.worker.Worker spark://$ip:$SPARK_MASTER_PORT
\ No newline at end of file diff --git a/bin/stop-all.sh b/bin/stop-all.sh new file mode 100755 index 0000000000..d352f6f631 --- /dev/null +++ b/bin/stop-all.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# Start all spark daemons. +# Run this on the master nde + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +# Load the Spark configuration +. "$bin/spark-config.sh" + +# Stop the slaves, then the master +"$bin"/stop-slaves.sh +"$bin"/stop-master.sh diff --git a/bin/stop-master.sh b/bin/stop-master.sh new file mode 100755 index 0000000000..f33f71664e --- /dev/null +++ b/bin/stop-master.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# Starts the master on the machine this script is executed on. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +"$bin"/spark-daemon.sh stop spark.deploy.worker.Worker
\ No newline at end of file diff --git a/bin/stop-slaves.sh b/bin/stop-slaves.sh new file mode 100755 index 0000000000..f75167dd2c --- /dev/null +++ b/bin/stop-slaves.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# Starts the master on the machine this script is executed on. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +. "$bin/spark-config.sh" + +"$bin"/spark-daemon.sh stop spark.deploy.master.Master
\ No newline at end of file |