aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ec2/deploy.generic/root/spark-ec2/ec2-variables.sh11
-rwxr-xr-xec2/spark_ec2.py33
2 files changed, 32 insertions, 12 deletions
diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
new file mode 100644
index 0000000000..948cb5b1ec
--- /dev/null
+++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# These variables are automatically filled in by the mesos-ec2 script.
+export MESOS_MASTERS="{{master_list}}"
+export MESOS_SLAVES="{{slave_list}}"
+export MESOS_ZOO_LIST="{{zoo_list}}"
+export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
+export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
+export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
+export MODULES="{{modules}}"
+export SWAP="{{swap}}"
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index a5384d3bda..f2385b5b56 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -358,25 +358,31 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
# Deploy configuration files and run setup scripts on a newly launched
# or started EC2 cluster.
def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_key):
- print "Deploying files to master..."
- deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, zoo_nodes)
+ if opts.cluster_type == "mesos":
+ modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mesos']
+ elif opts.cluster_type == "standalone":
+ modules = ['ephemeral-hdfs', 'persistent-hdfs', 'spark-standalone']
+
master = master_nodes[0].public_dns_name
if deploy_ssh_key:
print "Copying SSH key %s to master..." % opts.identity_file
ssh(master, opts, 'mkdir -p ~/.ssh')
scp(master, opts, opts.identity_file, '~/.ssh/id_rsa')
ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa')
+
+ # NOTE: We should clone the repository before running deploy_files to prevent
+ # ec2-variables.sh from being overwritten
+ ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/shivaram/spark-ec2.git")
+ print "Deploying files to master..."
+ deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes,
+ zoo_nodes, modules)
print "Running setup on master..."
- if opts.cluster_type == "mesos":
- setup_mesos_cluster(master, opts)
- elif opts.cluster_type == "standalone":
- setup_standalone_cluster(master, slave_nodes, opts)
+ setup_spark_cluster(master, opts)
print "Done!"
-def setup_mesos_cluster(master, opts):
- ssh(master, opts, "chmod u+x mesos-ec2/setup")
- ssh(master, opts, "mesos-ec2/setup %s %s %s %s" %
- ("generic", "none", "master", opts.swap))
+def setup_spark_cluster(master, opts):
+ ssh(master, opts, "chmod u+x spark-ec2/setup.sh")
+ ssh(master, opts, "spark-ec2/setup.sh")
def setup_standalone_cluster(master, slave_nodes, opts):
slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes])
@@ -427,7 +433,8 @@ def get_num_disks(instance_type):
# cluster (e.g. lists of masters and slaves). Files are only deployed to
# the first master instance in the cluster, and we expect the setup
# script to be run on that instance to copy them to other nodes.
-def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes):
+def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes,
+ modules):
active_master = master_nodes[0].public_dns_name
num_disks = get_num_disks(opts.instance_type)
@@ -459,7 +466,9 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes):
"cluster_url": cluster_url,
"hdfs_data_dirs": hdfs_data_dirs,
"mapred_local_dirs": mapred_local_dirs,
- "spark_local_dirs": spark_local_dirs
+ "spark_local_dirs": spark_local_dirs,
+ "swap": str(opts.swap),
+ "modules": '\n'.join(modules)
}
# Create a temp directory in which we will place all the files to be