aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKousuke Saruta <sarutak@oss.nttdata.co.jp>2014-09-25 16:49:15 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-09-25 16:49:15 -0700
commit0dc868e787a3bc69c1b8e90d916a6dcea8dbcd6d (patch)
treec5f16816cb9169916e8d697fd922b6d2fc521e98
parentff637c9380a6342fd0a4dde0710ec23856751dd4 (diff)
downloadspark-0dc868e787a3bc69c1b8e90d916a6dcea8dbcd6d.tar.gz
spark-0dc868e787a3bc69c1b8e90d916a6dcea8dbcd6d.tar.bz2
spark-0dc868e787a3bc69c1b8e90d916a6dcea8dbcd6d.zip
[SPARK-3584] sbin/slaves doesn't work when we use password authentication for SSH
Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp> Closes #2444 from sarutak/slaves-scripts-modification and squashes the following commits: eff7394 [Kousuke Saruta] Improve the description about Cluster Launch Script in docs/spark-standalone.md 7858225 [Kousuke Saruta] Modified sbin/slaves to use the environment variable "SPARK_SSH_FOREGROUND" as a flag 53d7121 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into slaves-scripts-modification e570431 [Kousuke Saruta] Added a description for SPARK_SSH_FOREGROUND variable 7120a0c [Kousuke Saruta] Added a description about default host for sbin/slaves 1bba8a9 [Kousuke Saruta] Added SPARK_SSH_FOREGROUND flag to sbin/slaves 88e2f17 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into slaves-scripts-modification 297e75d [Kousuke Saruta] Modified sbin/slaves not to export HOSTLIST
-rw-r--r--.gitignore1
-rw-r--r--.rat-excludes1
-rw-r--r--conf/slaves.template (renamed from conf/slaves)0
-rw-r--r--docs/spark-standalone.md7
-rwxr-xr-xsbin/slaves.sh31
5 files changed, 30 insertions, 10 deletions
diff --git a/.gitignore b/.gitignore
index 7779980b74..34939e3a97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ conf/*.cmd
conf/*.properties
conf/*.conf
conf/*.xml
+conf/slaves
docs/_site
docs/api
target/
diff --git a/.rat-excludes b/.rat-excludes
index 9fc99d7fca..b14ad53720 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -19,6 +19,7 @@ log4j.properties
log4j.properties.template
metrics.properties.template
slaves
+slaves.template
spark-env.sh
spark-env.cmd
spark-env.sh.template
diff --git a/conf/slaves b/conf/slaves.template
index da0a01343d..da0a01343d 100644
--- a/conf/slaves
+++ b/conf/slaves.template
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 29b5491861..58103fab20 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -62,7 +62,12 @@ Finally, the following configuration options can be passed to the master and wor
# Cluster Launch Scripts
-To launch a Spark standalone cluster with the launch scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing, you can just put `localhost` in this file.
+To launch a Spark standalone cluster with the launch scripts, you should create a file called conf/slaves in your Spark directory,
+which must contain the hostnames of all the machines where you intend to start Spark workers, one per line.
+If conf/slaves does not exist, the launch scripts defaults to a single machine (localhost), which is useful for testing.
+Note, the master machine accesses each of the worker machines via ssh. By default, ssh is run in parallel and requires password-less (using a private key) access to be setup.
+If you do not have a password-less setup, you can set the environment variable SPARK_SSH_FOREGROUND and serially provide a password for each worker.
+
Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
diff --git a/sbin/slaves.sh b/sbin/slaves.sh
index 1d4dc5edf9..cdad47ee2e 100755
--- a/sbin/slaves.sh
+++ b/sbin/slaves.sh
@@ -44,7 +44,9 @@ sbin="`cd "$sbin"; pwd`"
# If the slaves file is specified in the command line,
# then it takes precedence over the definition in
# spark-env.sh. Save it here.
-HOSTLIST="$SPARK_SLAVES"
+if [ -f "$SPARK_SLAVES" ]; then
+ HOSTLIST=`cat "$SPARK_SLAVES"`
+fi
# Check if --config is passed as an argument. It is an optional parameter.
# Exit if the argument is not a directory.
@@ -67,23 +69,34 @@ fi
if [ "$HOSTLIST" = "" ]; then
if [ "$SPARK_SLAVES" = "" ]; then
- export HOSTLIST="${SPARK_CONF_DIR}/slaves"
+ if [ -f "${SPARK_CONF_DIR}/slaves" ]; then
+ HOSTLIST=`cat "${SPARK_CONF_DIR}/slaves"`
+ else
+ HOSTLIST=localhost
+ fi
else
- export HOSTLIST="${SPARK_SLAVES}"
+ HOSTLIST=`cat "${SPARK_SLAVES}"`
fi
fi
+
+
# By default disable strict host key checking
if [ "$SPARK_SSH_OPTS" = "" ]; then
SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
fi
-for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
- ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
- 2>&1 | sed "s/^/$slave: /" &
- if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
- sleep $SPARK_SLAVE_SLEEP
- fi
+for slave in `echo "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
+ if [ -n "${SPARK_SSH_FOREGROUND}" ]; then
+ ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+ 2>&1 | sed "s/^/$slave: /"
+ else
+ ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+ 2>&1 | sed "s/^/$slave: /" &
+ fi
+ if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
+ sleep $SPARK_SLAVE_SLEEP
+ fi
done
wait