aboutsummaryrefslogtreecommitdiff
path: root/ec2
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-06-03 15:14:38 -0700
committerAndrew Or <andrew@databricks.com>2015-06-03 15:14:38 -0700
commitd3e026f8798f9875b90e8c372056ee3d71489be5 (patch)
tree7ceb7f8507247280f6aadf524fbfc42e359b1374 /ec2
parenta8f1f1543e29fb2897e9ae6940581b9e4a3a13fb (diff)
downloadspark-d3e026f8798f9875b90e8c372056ee3d71489be5.tar.gz
spark-d3e026f8798f9875b90e8c372056ee3d71489be5.tar.bz2
spark-d3e026f8798f9875b90e8c372056ee3d71489be5.zip
[SPARK-3674] [EC2] Clear SPARK_WORKER_INSTANCES when using YARN
cc andrewor14 Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Closes #6424 from shivaram/spark-worker-instances-yarn-ec2 and squashes the following commits: db244ae [Shivaram Venkataraman] Make Python Lint happy 0593d1b [Shivaram Venkataraman] Clear SPARK_WORKER_INSTANCES when using YARN
Diffstat (limited to 'ec2')
-rwxr-xr-xec2/spark_ec2.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index ee0904c9e5..84629cb9a0 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -219,7 +219,8 @@ def parse_args():
"(default: %default).")
parser.add_option(
"--hadoop-major-version", default="1",
- help="Major version of Hadoop (default: %default)")
+ help="Major version of Hadoop. Valid options are 1 (Hadoop 1.0.4), 2 (CDH 4.2.0), yarn " +
+ "(Hadoop 2.4.0) (default: %default)")
parser.add_option(
"-D", metavar="[ADDRESS:]PORT", dest="proxy_port",
help="Use SSH dynamic port forwarding to create a SOCKS proxy at " +
@@ -271,7 +272,8 @@ def parse_args():
help="Launch fresh slaves, but use an existing stopped master if possible")
parser.add_option(
"--worker-instances", type="int", default=1,
- help="Number of instances per worker: variable SPARK_WORKER_INSTANCES (default: %default)")
+ help="Number of instances per worker: variable SPARK_WORKER_INSTANCES. Not used if YARN " +
+ "is used as Hadoop major version (default: %default)")
parser.add_option(
"--master-opts", type="string", default="",
help="Extra options to give to master through SPARK_MASTER_OPTS variable " +
@@ -761,6 +763,10 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
if opts.ganglia:
modules.append('ganglia')
+ # Clear SPARK_WORKER_INSTANCES if running on YARN
+ if opts.hadoop_major_version == "yarn":
+ opts.worker_instances = ""
+
# NOTE: We should clone the repository before running deploy_files to
# prevent ec2-variables.sh from being overwritten
print("Cloning spark-ec2 scripts from {r}/tree/{b} on master...".format(
@@ -998,6 +1004,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
master_addresses = [get_dns_name(i, opts.private_ips) for i in master_nodes]
slave_addresses = [get_dns_name(i, opts.private_ips) for i in slave_nodes]
+ worker_instances_str = "%d" % opts.worker_instances if opts.worker_instances else ""
template_vars = {
"master_list": '\n'.join(master_addresses),
"active_master": active_master,
@@ -1011,7 +1018,7 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
"spark_version": spark_v,
"tachyon_version": tachyon_v,
"hadoop_major_version": opts.hadoop_major_version,
- "spark_worker_instances": "%d" % opts.worker_instances,
+ "spark_worker_instances": worker_instances_str,
"spark_master_opts": opts.master_opts
}