aboutsummaryrefslogtreecommitdiff
path: root/ec2
diff options
context:
space:
mode:
authorNicholas Chammas <nicholas.chammas@gmail.com>2015-03-10 10:58:31 +0000
committerSean Owen <sowen@cloudera.com>2015-03-10 10:58:31 +0000
commitd14df06c05a6228fd6522914c39aa75898eddfc1 (patch)
tree5e5cf41da06ee017762924e4bd4a991b113da4c3 /ec2
parentc4c4b07bf61cab01d92fde4f902d8c06abdce240 (diff)
downloadspark-d14df06c05a6228fd6522914c39aa75898eddfc1.tar.gz
spark-d14df06c05a6228fd6522914c39aa75898eddfc1.tar.bz2
spark-d14df06c05a6228fd6522914c39aa75898eddfc1.zip
[SPARK-6191] [EC2] Generalize ability to download libs
Right now we have a method to specifically download boto. This PR generalizes it so it's easy to download additional libraries if we want. For example, adding new external libraries for spark-ec2 is now as simple as: ```python external_libs = [ { "name": "boto", "version": "2.34.0", "md5": "5556223d2d0cc4d06dd4829e671dcecd" }, { "name": "PyYAML", "version": "3.11", "md5": "f50e08ef0fe55178479d3a618efe21db" }, { "name": "argparse", "version": "1.3.0", "md5": "9bcf7f612190885c8c85e30ba41db3c7" } ] ``` Likely use cases: * Downloading PyYAML to allow spark-ec2 configs to be persisted as a YAML file. ([SPARK-925](https://issues.apache.org/jira/browse/SPARK-925)) * Downloading argparse to clean up / modernize our option parsing. First run output, with PyYAML and argparse added just for demonstration purposes: ```shell $ ./spark-ec2 --version Downloading external libraries that spark-ec2 needs from PyPI to /path/to/spark/ec2/lib... This should be a one-time operation. - Downloading boto... - Finished downloading boto. - Downloading PyYAML... - Finished downloading PyYAML. - Downloading argparse... - Finished downloading argparse. spark-ec2 1.2.1 ``` Output thereafter: ```shell $ ./spark-ec2 --version spark-ec2 1.2.1 ``` Author: Nicholas Chammas <nicholas.chammas@gmail.com> Closes #4919 from nchammas/setup-ec2-libs and squashes the following commits: a077955 [Nicholas Chammas] print default region c95fb7d [Nicholas Chammas] to docstring 5448845 [Nicholas Chammas] remove libs added for demo purposes 60d8c23 [Nicholas Chammas] generalize ability to download libs
Diffstat (limited to 'ec2')
-rwxr-xr-xec2/spark_ec2.py82
1 files changed, 54 insertions, 28 deletions
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index b50b3816ff..3acb5fea04 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -70,34 +70,60 @@ DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2"
DEFAULT_SPARK_EC2_BRANCH = "branch-1.3"
-def setup_boto():
- # Download Boto if it's not already present in the SPARK_EC2_DIR/lib folder:
- version = "boto-2.34.0"
- md5 = "5556223d2d0cc4d06dd4829e671dcecd"
- url = "https://pypi.python.org/packages/source/b/boto/%s.tar.gz" % version
- lib_dir = os.path.join(SPARK_EC2_DIR, "lib")
- if not os.path.exists(lib_dir):
- os.mkdir(lib_dir)
- boto_lib_dir = os.path.join(lib_dir, version)
- if not os.path.isdir(boto_lib_dir):
- tgz_file_path = os.path.join(lib_dir, "%s.tar.gz" % version)
- print "Downloading Boto from PyPi"
- download_stream = urllib2.urlopen(url)
- with open(tgz_file_path, "wb") as tgz_file:
- tgz_file.write(download_stream.read())
- with open(tgz_file_path) as tar:
- if hashlib.md5(tar.read()).hexdigest() != md5:
- print >> stderr, "ERROR: Got wrong md5sum for Boto"
- sys.exit(1)
- tar = tarfile.open(tgz_file_path)
- tar.extractall(path=lib_dir)
- tar.close()
- os.remove(tgz_file_path)
- print "Finished downloading Boto"
- sys.path.insert(0, boto_lib_dir)
+def setup_external_libs(libs):
+ """
+ Download external libraries from PyPI to SPARK_EC2_DIR/lib/ and prepend them to our PATH.
+ """
+ PYPI_URL_PREFIX = "https://pypi.python.org/packages/source"
+ SPARK_EC2_LIB_DIR = os.path.join(SPARK_EC2_DIR, "lib")
+
+ if not os.path.exists(SPARK_EC2_LIB_DIR):
+ print "Downloading external libraries that spark-ec2 needs from PyPI to {path}...".format(
+ path=SPARK_EC2_LIB_DIR
+ )
+ print "This should be a one-time operation."
+ os.mkdir(SPARK_EC2_LIB_DIR)
+
+ for lib in libs:
+ versioned_lib_name = "{n}-{v}".format(n=lib["name"], v=lib["version"])
+ lib_dir = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name)
+
+ if not os.path.isdir(lib_dir):
+ tgz_file_path = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name + ".tar.gz")
+ print " - Downloading {lib}...".format(lib=lib["name"])
+ download_stream = urllib2.urlopen(
+ "{prefix}/{first_letter}/{lib_name}/{lib_name}-{lib_version}.tar.gz".format(
+ prefix=PYPI_URL_PREFIX,
+ first_letter=lib["name"][:1],
+ lib_name=lib["name"],
+ lib_version=lib["version"]
+ )
+ )
+ with open(tgz_file_path, "wb") as tgz_file:
+ tgz_file.write(download_stream.read())
+ with open(tgz_file_path) as tar:
+ if hashlib.md5(tar.read()).hexdigest() != lib["md5"]:
+ print >> stderr, "ERROR: Got wrong md5sum for {lib}.".format(lib=lib["name"])
+ sys.exit(1)
+ tar = tarfile.open(tgz_file_path)
+ tar.extractall(path=SPARK_EC2_LIB_DIR)
+ tar.close()
+ os.remove(tgz_file_path)
+ print " - Finished downloading {lib}.".format(lib=lib["name"])
+ sys.path.insert(1, lib_dir)
+
+
+# Only PyPI libraries are supported.
+external_libs = [
+ {
+ "name": "boto",
+ "version": "2.34.0",
+ "md5": "5556223d2d0cc4d06dd4829e671dcecd"
+ }
+]
+setup_external_libs(external_libs)
-setup_boto()
import boto
from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBSBlockDeviceType
from boto import ec2
@@ -136,7 +162,7 @@ def parse_args():
help="Master instance type (leave empty for same as instance-type)")
parser.add_option(
"-r", "--region", default="us-east-1",
- help="EC2 region used to launch instances in, or to find them in")
+ help="EC2 region used to launch instances in, or to find them in (default: %default)")
parser.add_option(
"-z", "--zone", default="",
help="Availability zone to launch instances in, or 'all' to spread " +
@@ -230,7 +256,7 @@ def parse_args():
"(e.g -Dspark.worker.timeout=180)")
parser.add_option(
"--user-data", type="string", default="",
- help="Path to a user-data file (most AMI's interpret this as an initialization script)")
+ help="Path to a user-data file (most AMIs interpret this as an initialization script)")
parser.add_option(
"--authorized-address", type="string", default="0.0.0.0/0",
help="Address to authorize on created security groups (default: %default)")