diff options
Diffstat (limited to 'ec2/spark_ec2.py')
-rwxr-xr-x | ec2/spark_ec2.py | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 66b1faf2cd..7affe6fffc 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -35,7 +35,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType from boto import ec2 # A static URL from which to figure out the latest Mesos EC2 AMI -LATEST_AMI_URL = "https://s3.amazonaws.com/mesos-images/ids/latest-spark-0.6" +LATEST_AMI_URL = "https://s3.amazonaws.com/mesos-images/ids/latest-spark-0.7" # Configure and parse our command-line arguments @@ -83,16 +83,16 @@ def parse_args(): help="If specified, launch slaves as spot instances with the given " + "maximum price (in dollars)") parser.add_option("--cluster-type", type="choice", metavar="TYPE", - choices=["mesos", "standalone"], default="mesos", + choices=["mesos", "standalone"], default="standalone", help="'mesos' for a Mesos cluster, 'standalone' for a standalone " + - "Spark cluster (default: mesos)") + "Spark cluster (default: standalone)") parser.add_option("--ganglia", action="store_true", default=True, help="Setup Ganglia monitoring on cluster (default: on). NOTE: " + "the Ganglia page will be publicly accessible") parser.add_option("--no-ganglia", action="store_false", dest="ganglia", help="Disable Ganglia monitoring for the cluster") - parser.add_option("--new-scripts", action="store_true", default=False, - help="Use new spark-ec2 scripts, for Spark >= 0.7 AMIs") + parser.add_option("--old-scripts", action="store_true", default=False, + help="Use old mesos-ec2 scripts, for Spark <= 0.6 AMIs") parser.add_option("-u", "--user", default="root", help="The SSH user you want to connect as (default: root)") parser.add_option("--delete-groups", action="store_true", default=False, @@ -103,7 +103,7 @@ def parse_args(): parser.print_help() sys.exit(1) (action, cluster_name) = args - if opts.identity_file == None and action in ['launch', 'login']: + if opts.identity_file == None and action in ['launch', 'login', 'start']: print >> stderr, ("ERROR: The -i or --identity-file argument is " + "required for " + action) sys.exit(1) @@ -383,7 +383,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k if opts.ganglia: modules.append('ganglia') - if opts.new_scripts: + if not opts.old_scripts: # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git") @@ -393,7 +393,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k zoo_nodes, modules) print "Running setup on master..." - if not opts.new_scripts: + if opts.old_scripts: if opts.cluster_type == "mesos": setup_mesos_cluster(master, opts) elif opts.cluster_type == "standalone": @@ -540,11 +540,24 @@ def scp(host, opts, local_file, dest_file): (opts.identity_file, local_file, opts.user, host, dest_file), shell=True) -# Run a command on a host through ssh, throwing an exception if ssh fails +# Run a command on a host through ssh, retrying up to two times +# and then throwing an exception if ssh continues to fail. def ssh(host, opts, command): - subprocess.check_call( - "ssh -t -o StrictHostKeyChecking=no -i %s %s@%s '%s'" % - (opts.identity_file, opts.user, host, command), shell=True) + tries = 0 + while True: + try: + return subprocess.check_call( + "ssh -t -o StrictHostKeyChecking=no -i %s %s@%s '%s'" % + (opts.identity_file, opts.user, host, command), shell=True) + except subprocess.CalledProcessError as e: + if (tries > 2): + raise e + print "Error connecting to host {0}, sleeping 30".format(e) + time.sleep(30) + tries = tries + 1 + + + # Gets a list of zones to launch instances in |