path: root/bin
diff options
authorDenny <dennybritz@gmail.com>2012-08-01 13:17:31 -0700
committerDenny <dennybritz@gmail.com>2012-08-01 20:38:52 -0700
commit0ee44c225e38abbf3382be6e9555ab9a35424a54 (patch)
tree9fc106518bf55fc27083463843e9ee591c63ee59 /bin
parent545165e8153bb516d4364f9b3df1440c6b44c01b (diff)
Spark standalone mode cluster scripts.
Heavily inspired by Hadoop cluster scripts ;-)
Diffstat (limited to 'bin')
10 files changed, 308 insertions, 0 deletions
diff --git a/bin/slaves.sh b/bin/slaves.sh
new file mode 100755
index 0000000000..e0dd883358
--- /dev/null
+++ b/bin/slaves.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# Run a shell command on all slave hosts.
+# Environment Variables
+# SPARK_SLAVES File naming remote hosts.
+# Default is ${SPARK_CONF_DIR}/slaves.
+# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_HOME}/conf.
+# SPARK_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+# SPARK_SSH_OPTS Options passed to ssh when running remote commands.
+usage="Usage: slaves.sh [--config confdir] command..."
+# if no args specified, show usage
+if [ $# -le 0 ]; then
+ echo $usage
+ exit 1
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+# If the slaves file is specified in the command line,
+# then it takes precedence over the definition in
+# spark-env.sh. Save it here.
+if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+ . "${SPARK_CONF_DIR}/spark-env.sh"
+if [ "$HOSTLIST" = "" ]; then
+ if [ "$SPARK_SLAVES" = "" ]; then
+ export HOSTLIST="${SPARK_CONF_DIR}/slaves"
+ else
+ fi
+echo $"${@// /\\ }"
+# By default disable strict host key checking
+if [ "$SPARK_SSH_OPTS" = "" ]; then
+ SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
+for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
+ ssh $SPARK_SSH_OPTS $slave $"${@// /\\ }" \
+ 2>&1 | sed "s/^/$slave: /" &
+ if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
+ fi
diff --git a/bin/spark-config.sh b/bin/spark-config.sh
new file mode 100755
index 0000000000..d4b6558866
--- /dev/null
+++ b/bin/spark-config.sh
@@ -0,0 +1,19 @@
+# included in all the spark scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+# resolve links - $0 may be a softlink
+common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
+script="$(basename -- "$this")"
+# convert relative path to absolute path
+config_bin=`dirname "$this"`
+script=`basename "$this"`
+config_bin=`cd "$config_bin"; pwd`
+export SPARK_PREFIX=`dirname "$this"`/..
+export SPARK_CONF_DIR="$SPARK_HOME/conf" \ No newline at end of file
diff --git a/bin/spark-daemon.sh b/bin/spark-daemon.sh
new file mode 100755
index 0000000000..b5ecd9c1a2
--- /dev/null
+++ b/bin/spark-daemon.sh
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+# Runs a Spark command as a daemon.
+# Environment Variables
+# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_PREFIX}/conf.
+# SPARK_LOG_DIR Where log files are stored. PWD by default.
+# SPARK_MASTER host:path where spark code should be rsync'd from
+# SPARK_PID_DIR The pid files are stored. /tmp by default.
+# SPARK_IDENT_STRING A string representing this instance of spark. $USER by default
+# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
+usage="Usage: spark-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <spark-command> <args...>"
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+ echo $usage
+ exit 1
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+# get arguments
+spark_rotate_log ()
+ log=$1;
+ num=5;
+ if [ -n "$2" ]; then
+ num=$2
+ fi
+ if [ -f "$log" ]; then # rotate logs
+ while [ $num -gt 1 ]; do
+ prev=`expr $num - 1`
+ [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+ num=$prev
+ done
+ mv "$log" "$log.$num";
+ fi
+if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+ . "${SPARK_CONF_DIR}/spark-env.sh"
+if [ "$SPARK_IDENT_STRING" = "" ]; then
+# get log directory
+if [ "$SPARK_LOG_DIR" = "" ]; then
+ export SPARK_LOG_DIR="$SPARK_HOME/logs"
+mkdir -p "$SPARK_LOG_DIR"
+touch $SPARK_LOG_DIR/.spark_test > /dev/null 2>&1
+if [ "${TEST_LOG_DIR}" = "0" ]; then
+ rm -f $SPARK_LOG_DIR/.spark_test
+if [ "$SPARK_PID_DIR" = "" ]; then
+# some variables
+# Set default scheduling priority
+if [ "$SPARK_NICENESS" = "" ]; then
+case $startStop in
+ (start)
+ mkdir -p "$SPARK_PID_DIR"
+ if [ -f $pid ]; then
+ if kill -0 `cat $pid` > /dev/null 2>&1; then
+ echo $command running as process `cat $pid`. Stop it first.
+ exit 1
+ fi
+ fi
+ if [ "$SPARK_MASTER" != "" ]; then
+ echo rsync from $SPARK_MASTER
+ rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME"
+ fi
+ spark_rotate_log $log
+ echo starting $command, logging to $log
+ nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/run $command "$@" > "$log" 2>&1 < /dev/null &
+ echo $! > $pid
+ sleep 1; head "$log"
+ ;;
+ (stop)
+ if [ -f $pid ]; then
+ if kill -0 `cat $pid` > /dev/null 2>&1; then
+ echo stopping $command
+ kill `cat $pid`
+ else
+ echo no $command to stop
+ fi
+ else
+ echo no $command to stop
+ fi
+ ;;
+ (*)
+ echo $usage
+ exit 1
+ ;;
diff --git a/bin/spark-daemons.sh b/bin/spark-daemons.sh
new file mode 100755
index 0000000000..4f9719ee80
--- /dev/null
+++ b/bin/spark-daemons.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# Run a Spark command on all slave hosts.
+usage="Usage: spark-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+ echo $usage
+ exit 1
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/spark-daemon.sh" "$@"
diff --git a/bin/start-all.sh b/bin/start-all.sh
new file mode 100755
index 0000000000..9bd6c50654
--- /dev/null
+++ b/bin/start-all.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Start all spark daemons.
+# Starts the master on this node.
+# Starts a worker on each node specified in conf/slaves
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+# Load the Spark configuration
+. "$bin/spark-config.sh"
+# Start Master
+"$bin"/start-master.sh --config $SPARK_CONF_DIR
+# Start Workers
+"$bin"/start-slaves.sh --config $SPARK_CONF_DIR \ No newline at end of file
diff --git a/bin/start-master.sh b/bin/start-master.sh
new file mode 100755
index 0000000000..6403c944a4
--- /dev/null
+++ b/bin/start-master.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Starts the master on the machine this script is executed on.
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+"$bin"/spark-daemon.sh start spark.deploy.master.Master \ No newline at end of file
diff --git a/bin/start-slaves.sh b/bin/start-slaves.sh
new file mode 100755
index 0000000000..eb7663101b
--- /dev/null
+++ b/bin/start-slaves.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+# Find the port number for the master
+if [ "$SPARK_MASTER_PORT" = "" ]; then
+ip=`host "$hostname" | cut -d " " -f 4`
+"$bin"/spark-daemons.sh start spark.deploy.worker.Worker spark://$ip:$SPARK_MASTER_PORT \ No newline at end of file
diff --git a/bin/stop-all.sh b/bin/stop-all.sh
new file mode 100755
index 0000000000..d352f6f631
--- /dev/null
+++ b/bin/stop-all.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+# Start all spark daemons.
+# Run this on the master nde
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+# Load the Spark configuration
+. "$bin/spark-config.sh"
+# Stop the slaves, then the master
diff --git a/bin/stop-master.sh b/bin/stop-master.sh
new file mode 100755
index 0000000000..f33f71664e
--- /dev/null
+++ b/bin/stop-master.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Starts the master on the machine this script is executed on.
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+"$bin"/spark-daemon.sh stop spark.deploy.worker.Worker \ No newline at end of file
diff --git a/bin/stop-slaves.sh b/bin/stop-slaves.sh
new file mode 100755
index 0000000000..f75167dd2c
--- /dev/null
+++ b/bin/stop-slaves.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Starts the master on the machine this script is executed on.
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin/spark-config.sh"
+"$bin"/spark-daemon.sh stop spark.deploy.master.Master \ No newline at end of file