aboutsummaryrefslogtreecommitdiff
path: root/sbin/start-slave.sh
diff options
context:
space:
mode:
authorNathan Kronenfeld <nkronenfeld@oculusinfo.com>2015-04-13 18:21:16 -0700
committerAndrew Or <andrew@databricks.com>2015-04-13 18:21:16 -0700
commit435b8779df01a7477addecb1023605957bca4e9b (patch)
tree19bdc327afca005b5cec0005b0f9d0f78ce83233 /sbin/start-slave.sh
parent4898dfa464be55772e3f9db10c48adcb3cfc9a3d (diff)
downloadspark-435b8779df01a7477addecb1023605957bca4e9b.tar.gz
spark-435b8779df01a7477addecb1023605957bca4e9b.tar.bz2
spark-435b8779df01a7477addecb1023605957bca4e9b.zip
[Spark-4848] Allow different Worker configurations in standalone cluster
This refixes #3699 with the latest code. This fixes SPARK-4848 I've changed the stand-alone cluster scripts to allow different workers to have different numbers of instances, with both port and web-ui port following allong appropriately. I did this by moving the loop over instances from start-slaves and stop-slaves (on the master) to start-slave and stop-slave (on the worker). Wile I was at it, I changed SPARK_WORKER_PORT to work the same way as SPARK_WORKER_WEBUI_PORT, since the new methods work fine for both. Author: Nathan Kronenfeld <nkronenfeld@oculusinfo.com> Closes #5140 from nkronenfeld/feature/spark-4848 and squashes the following commits: cf5f47e [Nathan Kronenfeld] Merge remote branch 'upstream/master' into feature/spark-4848 044ca6f [Nathan Kronenfeld] Documentation and formatting as requested by by andrewor14 d739640 [Nathan Kronenfeld] Move looping through instances from the master to the workers, so that each worker respects its own number of instances and web-ui port
Diffstat (limited to 'sbin/start-slave.sh')
-rwxr-xr-xsbin/start-slave.sh59
1 files changed, 56 insertions, 3 deletions
diff --git a/sbin/start-slave.sh b/sbin/start-slave.sh
index 5a6de11afd..4c919ff76a 100755
--- a/sbin/start-slave.sh
+++ b/sbin/start-slave.sh
@@ -18,15 +18,68 @@
#
# Starts a slave on the machine this script is executed on.
+#
+# Environment Variables
+#
+# SPARK_WORKER_INSTANCES The number of worker instances to run on this
+# slave. Default is 1.
+# SPARK_WORKER_PORT The base port number for the first worker. If set,
+# subsequent workers will increment this number. If
+# unset, Spark will find a valid port number, but
+# with no guarantee of a predictable pattern.
+# SPARK_WORKER_WEBUI_PORT The base port for the web interface of the first
+# worker. Subsequent workers will increment this
+# number. Default is 8081.
-usage="Usage: start-slave.sh <worker#> <spark-master-URL> where <spark-master-URL> is like spark://localhost:7077"
+usage="Usage: start-slave.sh <spark-master-URL> where <spark-master-URL> is like spark://localhost:7077"
-if [ $# -lt 2 ]; then
+if [ $# -lt 1 ]; then
echo $usage
+ echo Called as start-slave.sh $*
exit 1
fi
sbin="`dirname "$0"`"
sbin="`cd "$sbin"; pwd`"
-"$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
+. "$sbin/spark-config.sh"
+
+. "$SPARK_PREFIX/bin/load-spark-env.sh"
+
+# First argument should be the master; we need to store it aside because we may
+# need to insert arguments between it and the other arguments
+MASTER=$1
+shift
+
+# Determine desired worker port
+if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
+ SPARK_WORKER_WEBUI_PORT=8081
+fi
+
+# Start up the appropriate number of workers on this machine.
+# quick local function to start a worker
+function start_instance {
+ WORKER_NUM=$1
+ shift
+
+ if [ "$SPARK_WORKER_PORT" = "" ]; then
+ PORT_FLAG=
+ PORT_NUM=
+ else
+ PORT_FLAG="--port"
+ PORT_NUM=$(( $SPARK_WORKER_PORT + $WORKER_NUM - 1 ))
+ fi
+ WEBUI_PORT=$(( $SPARK_WORKER_WEBUI_PORT + $WORKER_NUM - 1 ))
+
+ "$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker $WORKER_NUM \
+ --webui-port "$WEBUI_PORT" $PORT_FLAG $PORT_NUM $MASTER "$@"
+}
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+ start_instance 1 "$@"
+else
+ for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+ start_instance $(( 1 + $i )) "$@"
+ done
+fi
+