diff options
author | Nicholas Chammas <nicholas.chammas@gmail.com> | 2014-11-05 20:45:35 -0800 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2014-11-05 20:45:35 -0800 |
commit | db45f5ad0368760dbeaa618a04f66ae9b2bed656 (patch) | |
tree | 9e6916d5a46ea9d52d36e6862d37fc55d639df1b /ec2 | |
parent | 3d2b5bc5bb979d8b0b71e06bc0f4548376fdbb98 (diff) | |
download | spark-db45f5ad0368760dbeaa618a04f66ae9b2bed656.tar.gz spark-db45f5ad0368760dbeaa618a04f66ae9b2bed656.tar.bz2 spark-db45f5ad0368760dbeaa618a04f66ae9b2bed656.zip |
[SPARK-4137] [EC2] Don't change working dir on user
This issue was uncovered after [this discussion](https://issues.apache.org/jira/browse/SPARK-3398?focusedCommentId=14187471&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14187471).
Don't change the working directory on the user. This breaks relative paths the user may pass in, e.g., for the SSH identity file.
```
./ec2/spark-ec2 -i ../my.pem
```
This patch will preserve the user's current working directory and allow calls like the one above to work.
Author: Nicholas Chammas <nicholas.chammas@gmail.com>
Closes #2988 from nchammas/spark-ec2-cwd and squashes the following commits:
f3850b5 [Nicholas Chammas] pep8 fix
fbc20c7 [Nicholas Chammas] revert to old commenting style
752f958 [Nicholas Chammas] specify deploy.generic path absolutely
bcdf6a5 [Nicholas Chammas] fix typo
77871a2 [Nicholas Chammas] add clarifying comment
ce071fc [Nicholas Chammas] don't change working dir
Diffstat (limited to 'ec2')
-rwxr-xr-x | ec2/spark-ec2 | 8 | ||||
-rwxr-xr-x | ec2/spark_ec2.py | 12 |
2 files changed, 17 insertions, 3 deletions
diff --git a/ec2/spark-ec2 b/ec2/spark-ec2 index 31f9771223..4aa908242e 100755 --- a/ec2/spark-ec2 +++ b/ec2/spark-ec2 @@ -18,5 +18,9 @@ # limitations under the License. # -cd "`dirname $0`" -PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2.py "$@" +# Preserve the user's CWD so that relative paths are passed correctly to +#+ the underlying Python script. +SPARK_EC2_DIR="$(dirname $0)" + +PYTHONPATH="${SPARK_EC2_DIR}/third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" \ + python "${SPARK_EC2_DIR}/spark_ec2.py" "$@" diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 50f88f7356..a5396c2375 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -40,6 +40,7 @@ from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBS from boto import ec2 DEFAULT_SPARK_VERSION = "1.1.0" +SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) MESOS_SPARK_EC2_BRANCH = "v4" # A URL prefix from which to fetch AMI information @@ -593,7 +594,14 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): ) print "Deploying files to master..." - deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules) + deploy_files( + conn=conn, + root_dir=SPARK_EC2_DIR + "/" + "deploy.generic", + opts=opts, + master_nodes=master_nodes, + slave_nodes=slave_nodes, + modules=modules + ) print "Running setup on master..." setup_spark_cluster(master, opts) @@ -730,6 +738,8 @@ def get_num_disks(instance_type): # cluster (e.g. lists of masters and slaves). Files are only deployed to # the first master instance in the cluster, and we expect the setup # script to be run on that instance to copy them to other nodes. +# +# root_dir should be an absolute path to the directory with the files we want to deploy. def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules): active_master = master_nodes[0].public_dns_name |