aboutsummaryrefslogtreecommitdiff
path: root/ec2
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2013-04-06 00:35:50 -0700
committerHolden Karau <holden@pigscanfly.ca>2013-04-06 00:35:50 -0700
commitff2130a0ad17388036b66fcdf2b1848e208fa0f8 (patch)
tree31c11bbf9c6835bbc05f6ef906e873f430d1e6a1 /ec2
parent1f5381119f8c8afd0ba69bc7773c10972dd43bc1 (diff)
downloadspark-ff2130a0ad17388036b66fcdf2b1848e208fa0f8.tar.gz
spark-ff2130a0ad17388036b66fcdf2b1848e208fa0f8.tar.bz2
spark-ff2130a0ad17388036b66fcdf2b1848e208fa0f8.zip
Retry failed ssh commands. This is especially useful during system startup when the hosts may not have yet come on-line but can be useful at other points for people with flakey connections
Diffstat (limited to 'ec2')
-rw-r--r--ec2/retry_decorator.py44
-rwxr-xr-xec2/spark_ec2.py2
2 files changed, 46 insertions, 0 deletions
diff --git a/ec2/retry_decorator.py b/ec2/retry_decorator.py
new file mode 100644
index 0000000000..1a2f79ae84
--- /dev/null
+++ b/ec2/retry_decorator.py
@@ -0,0 +1,44 @@
+import time
+from functools import wraps
+
+def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
+ """Retry calling the decorated function using an exponential backoff.
+
+ http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+ original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
+
+ :param ExceptionToCheck: the exception to check. may be a tuple of
+ exceptions to check
+ :type ExceptionToCheck: Exception or tuple
+ :param tries: number of times to try (not retry) before giving up
+ :type tries: int
+ :param delay: initial delay between retries in seconds
+ :type delay: int
+ :param backoff: backoff multiplier e.g. value of 2 will double the delay
+ each retry
+ :type backoff: int
+ :param logger: logger to use. If None, print
+ :type logger: logging.Logger instance
+ """
+ def deco_retry(f):
+
+ @wraps(f)
+ def f_retry(*args, **kwargs):
+ mtries, mdelay = tries, delay
+ while mtries > 1:
+ try:
+ return f(*args, **kwargs)
+ except ExceptionToCheck, e:
+ msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
+ if logger:
+ logger.warning(msg)
+ else:
+ print msg
+ time.sleep(mdelay)
+ mtries -= 1
+ mdelay *= backoff
+ return f(*args, **kwargs)
+
+ return f_retry # true decorator
+
+ return deco_retry
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 571d27fde6..aa2d360fbb 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -22,6 +22,7 @@ from __future__ import with_statement
import logging
import os
import random
+from retry_decorator import retry
import shutil
import subprocess
import sys
@@ -541,6 +542,7 @@ def scp(host, opts, local_file, dest_file):
# Run a command on a host through ssh, throwing an exception if ssh fails
+@retry(subprocess.CalledProcessError, tries=3, delay=30)
def ssh(host, opts, command):
subprocess.check_call(
"ssh -t -o StrictHostKeyChecking=no -i %s %s@%s '%s'" %