diff options
author | Florian Verhein <florian.verhein@gmail.com> | 2015-02-09 23:47:07 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-02-09 23:47:07 +0000 |
commit | b884daa58084d4f42e2318894067565b94e07f9d (patch) | |
tree | f047764ae63bdb78f45baec499f0cd671b6cb819 /ec2 | |
parent | f48199eb354d6ec8675c2c1f96c3005064058d66 (diff) | |
download | spark-b884daa58084d4f42e2318894067565b94e07f9d.tar.gz spark-b884daa58084d4f42e2318894067565b94e07f9d.tar.bz2 spark-b884daa58084d4f42e2318894067565b94e07f9d.zip |
[SPARK-5611] [EC2] Allow spark-ec2 repo and branch to be set on CLI of spark_ec2.py
and by extension, the ami-list
Useful for using alternate spark-ec2 repos or branches.
Author: Florian Verhein <florian.verhein@gmail.com>
Closes #4385 from florianverhein/master and squashes the following commits:
7e2b4be [Florian Verhein] [SPARK-5611] [EC2] typo
8b653dc [Florian Verhein] [SPARK-5611] [EC2] Enforce only supporting spark-ec2 forks from github, log improvement
bc4b0ed [Florian Verhein] [SPARK-5611] allow spark-ec2 repos with different names
8b5c551 [Florian Verhein] improve option naming, fix logging, fix lint failing, add guard to enforce spark-ec2
7724308 [Florian Verhein] [SPARK-5611] [EC2] fixes
b42b68c [Florian Verhein] [SPARK-5611] [EC2] Allow spark-ec2 repo and branch to be set on CLI of spark_ec2.py
Diffstat (limited to 'ec2')
-rwxr-xr-x | ec2/spark_ec2.py | 37 |
1 files changed, 32 insertions, 5 deletions
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 87b2112fe4..3e4c49c0e1 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -62,10 +62,10 @@ VALID_SPARK_VERSIONS = set([ DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark" -MESOS_SPARK_EC2_BRANCH = "branch-1.3" -# A URL prefix from which to fetch AMI information -AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/{b}/ami-list".format(b=MESOS_SPARK_EC2_BRANCH) +# Default location to get the spark-ec2 scripts (and ami-list) from +DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2" +DEFAULT_SPARK_EC2_BRANCH = "branch-1.3" def setup_boto(): @@ -148,6 +148,14 @@ def parse_args(): default=DEFAULT_SPARK_GITHUB_REPO, help="Github repo from which to checkout supplied commit hash (default: %default)") parser.add_option( + "--spark-ec2-git-repo", + default=DEFAULT_SPARK_EC2_GITHUB_REPO, + help="Github repo from which to checkout spark-ec2 (default: %default)") + parser.add_option( + "--spark-ec2-git-branch", + default=DEFAULT_SPARK_EC2_BRANCH, + help="Github repo branch of spark-ec2 to use (default: %default)") + parser.add_option( "--hadoop-major-version", default="1", help="Major version of Hadoop (default: %default)") parser.add_option( @@ -333,7 +341,12 @@ def get_spark_ami(opts): print >> stderr,\ "Don't recognize %s, assuming type is pvm" % opts.instance_type - ami_path = "%s/%s/%s" % (AMI_PREFIX, opts.region, instance_type) + # URL prefix from which to fetch AMI information + ami_prefix = "{r}/{b}/ami-list".format( + r=opts.spark_ec2_git_repo.replace("https://github.com", "https://raw.github.com", 1), + b=opts.spark_ec2_git_branch) + + ami_path = "%s/%s/%s" % (ami_prefix, opts.region, instance_type) try: ami = urllib2.urlopen(ami_path).read().strip() print "Spark AMI: " + ami @@ -650,12 +663,15 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten + print "Cloning spark-ec2 scripts from {r}/tree/{b} on master...".format( + r=opts.spark_ec2_git_repo, b=opts.spark_ec2_git_branch) ssh( host=master, opts=opts, command="rm -rf spark-ec2" + " && " - + "git clone https://github.com/mesos/spark-ec2.git -b {b}".format(b=MESOS_SPARK_EC2_BRANCH) + + "git clone {r} -b {b} spark-ec2".format(r=opts.spark_ec2_git_repo, + b=opts.spark_ec2_git_branch) ) print "Deploying files to master..." @@ -1038,6 +1054,17 @@ def real_main(): print >> stderr, "ebs-vol-num cannot be greater than 8" sys.exit(1) + # Prevent breaking ami_prefix (/, .git and startswith checks) + # Prevent forks with non spark-ec2 names for now. + if opts.spark_ec2_git_repo.endswith("/") or \ + opts.spark_ec2_git_repo.endswith(".git") or \ + not opts.spark_ec2_git_repo.startswith("https://github.com") or \ + not opts.spark_ec2_git_repo.endswith("spark-ec2"): + print >> stderr, "spark-ec2-git-repo must be a github repo and it must not have a " \ + "trailing / or .git. " \ + "Furthermore, we currently only support forks named spark-ec2." + sys.exit(1) + try: conn = ec2.connect_to_region(opts.region) except Exception as e: |