aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkalpit <shahkalpit84@gmail.com>2013-03-06 18:06:32 -0800
committerkalpit <shahkalpit84@gmail.com>2013-03-26 17:49:30 -0700
commitf08db010d3ba4eff132f5c06a087b0c7c2e58576 (patch)
tree6b08dfbbcb05594b7ce2d712d33e105bf9bd214c
parentf0164e5047f3a023768cdf0515a7a7d1c75ef7c3 (diff)
downloadspark-f08db010d3ba4eff132f5c06a087b0c7c2e58576.tar.gz
spark-f08db010d3ba4eff132f5c06a087b0c7c2e58576.tar.bz2
spark-f08db010d3ba4eff132f5c06a087b0c7c2e58576.zip
added SPARK_WORKER_INSTANCES : allows spawning multiple worker instances/processes on every slave machine
-rwxr-xr-xbin/spark-daemon.sh6
-rwxr-xr-xbin/spark-daemons.sh2
-rwxr-xr-xbin/start-master.sh2
-rwxr-xr-xbin/start-slave.sh2
-rwxr-xr-xbin/start-slaves.sh11
-rwxr-xr-xbin/stop-master.sh2
-rwxr-xr-xbin/stop-slaves.sh12
-rwxr-xr-xconf/spark-env.sh.template1
8 files changed, 30 insertions, 8 deletions
diff --git a/bin/spark-daemon.sh b/bin/spark-daemon.sh
index 0c584055c7..d7dc62ab08 100755
--- a/bin/spark-daemon.sh
+++ b/bin/spark-daemon.sh
@@ -30,7 +30,7 @@
# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
##
-usage="Usage: spark-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <spark-command> <args...>"
+usage="Usage: spark-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <spark-command> <spark-instance-number> <args...>"
# if no args specified, show usage
if [ $# -le 1 ]; then
@@ -48,6 +48,8 @@ startStop=$1
shift
command=$1
shift
+instance=$1
+shift
spark_rotate_log ()
{
@@ -95,7 +97,7 @@ fi
export SPARK_LOGFILE=spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.log
export SPARK_ROOT_LOGGER="INFO,DRFA"
log=$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.out
-pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command.pid
+pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid
# Set default scheduling priority
if [ "$SPARK_NICENESS" = "" ]; then
diff --git a/bin/spark-daemons.sh b/bin/spark-daemons.sh
index 4f9719ee80..0619097e4d 100755
--- a/bin/spark-daemons.sh
+++ b/bin/spark-daemons.sh
@@ -2,7 +2,7 @@
# Run a Spark command on all slave hosts.
-usage="Usage: spark-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+usage="Usage: spark-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command instance-number args..."
# if no args specified, show usage
if [ $# -le 1 ]; then
diff --git a/bin/start-master.sh b/bin/start-master.sh
index 87feb261fe..83a3e1f3dc 100755
--- a/bin/start-master.sh
+++ b/bin/start-master.sh
@@ -32,4 +32,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then
fi
fi
-"$bin"/spark-daemon.sh start spark.deploy.master.Master --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
+"$bin"/spark-daemon.sh start spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
diff --git a/bin/start-slave.sh b/bin/start-slave.sh
index 45a0cf7a6b..616c76e4ee 100755
--- a/bin/start-slave.sh
+++ b/bin/start-slave.sh
@@ -11,4 +11,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then
fi
fi
-"$bin"/spark-daemon.sh start spark.deploy.worker.Worker $1
+"$bin"/spark-daemon.sh start spark.deploy.worker.Worker "$@"
diff --git a/bin/start-slaves.sh b/bin/start-slaves.sh
index 390247ca4a..4e05224190 100755
--- a/bin/start-slaves.sh
+++ b/bin/start-slaves.sh
@@ -21,4 +21,13 @@ fi
echo "Master IP: $SPARK_MASTER_IP"
# Launch the slaves
-exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+ exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
+else
+ if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
+ SPARK_WORKER_WEBUI_PORT=8081
+ fi
+ for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+ "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" $(( $i + 1 )) spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
+ done
+fi
diff --git a/bin/stop-master.sh b/bin/stop-master.sh
index f75167dd2c..172ee5891d 100755
--- a/bin/stop-master.sh
+++ b/bin/stop-master.sh
@@ -7,4 +7,4 @@ bin=`cd "$bin"; pwd`
. "$bin/spark-config.sh"
-"$bin"/spark-daemon.sh stop spark.deploy.master.Master \ No newline at end of file
+"$bin"/spark-daemon.sh stop spark.deploy.master.Master 1
diff --git a/bin/stop-slaves.sh b/bin/stop-slaves.sh
index 21c9ebf324..fbfc594472 100755
--- a/bin/stop-slaves.sh
+++ b/bin/stop-slaves.sh
@@ -7,4 +7,14 @@ bin=`cd "$bin"; pwd`
. "$bin/spark-config.sh"
-"$bin"/spark-daemons.sh stop spark.deploy.worker.Worker \ No newline at end of file
+if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+ . "${SPARK_CONF_DIR}/spark-env.sh"
+fi
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+ "$bin"/spark-daemons.sh stop spark.deploy.worker.Worker 1
+else
+ for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+ "$bin"/spark-daemons.sh stop spark.deploy.worker.Worker $(( $i + 1 ))
+ done
+fi
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 6d71ec5691..37565ca827 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -12,6 +12,7 @@
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
+# - SPARK_WORKER_INSTANCES, to set the number of worker instances/processes to be spawned on every slave machine
#
# Finally, Spark also relies on the following variables, but these can be set
# on just the *master* (i.e. in your driver program), and will automatically