From 14a1df65729a3c7e5fd54f79424626a540aef5d4 Mon Sep 17 00:00:00 2001
From: Grega Kespret <grega@celtra.com>
Date: Mon, 9 Dec 2013 10:39:02 +0100
Subject: Fix for spark.task.maxFailures not enforced correctly.

---
 .../org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
index 94961790df..bf494aa64d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
@@ -529,10 +529,10 @@ private[spark] class ClusterTaskSetManager(
       addPendingTask(index)
       if (state != TaskState.KILLED) {
         numFailures(index) += 1
-        if (numFailures(index) > MAX_TASK_FAILURES) {
-          logError("Task %s:%d failed more than %d times; aborting job".format(
+        if (numFailures(index) >= MAX_TASK_FAILURES) {
+          logError("Task %s:%d failed %d times; aborting job".format(
             taskSet.id, index, MAX_TASK_FAILURES))
-          abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
+          abort("Task %s:%d failed %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
         }
       }
     } else {
-- 
cgit v1.2.3


From 558af873340087cad79630ec5c498672c5ea3c4f Mon Sep 17 00:00:00 2001
From: Grega Kespret <grega@celtra.com>
Date: Tue, 10 Dec 2013 11:43:42 +0100
Subject: Fix tests.

---
 core/src/test/scala/org/apache/spark/DistributedSuite.scala             | 2 +-
 .../org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 480bac84f3..816cbee3e0 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -122,7 +122,7 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
       sc.parallelize(1 to 10, 10).foreach(x => println(x / 0))
     }
     assert(thrown.getClass === classOf[SparkException])
-    assert(thrown.getMessage.contains("more than 4 times"))
+    assert(thrown.getMessage.contains("failed 4 times"))
   }
 
   test("caching") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
index b97f2b19b5..29c4cc5d9c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
@@ -283,7 +283,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
 
     // Fail the task MAX_TASK_FAILURES times, and check that the task set is aborted
     // after the last failure.
-    (0 until manager.MAX_TASK_FAILURES).foreach { index =>
+    (1 to manager.MAX_TASK_FAILURES).foreach { index =>
       val offerResult = manager.resourceOffer("exec1", "host1", 1, ANY)
       assert(offerResult != None,
         "Expect resource offer on iteration %s to return a task".format(index))
-- 
cgit v1.2.3


From d17c142615f7d0ff514a74779e433107659a78b9 Mon Sep 17 00:00:00 2001
From: Ewen Cheslack-Postava <me@ewencp.org>
Date: Mon, 16 Dec 2013 08:09:37 -0800
Subject: Force pseudo-tty allocation in spark-ec2 script.

ssh commands need the -t argument repeated twice if there is no local
tty, e.g. if the process running spark-ec2 uses nohup and the parent
process exits.
---
 ec2/spark_ec2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 1189232428..a2b0e7e7f4 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -589,7 +589,7 @@ def ssh(host, opts, command):
   while True:
     try:
       return subprocess.check_call(
-        ssh_command(opts) + ['-t', '%s@%s' % (opts.user, host), stringify_command(command)])
+        ssh_command(opts) + ['-t', '-t', '%s@%s' % (opts.user, host), stringify_command(command)])
     except subprocess.CalledProcessError as e:
       if (tries > 2):
         # If this was an ssh failure, provide the user with hints.
@@ -730,7 +730,7 @@ def real_main():
     if opts.proxy_port != None:
       proxy_opt = ['-D', opts.proxy_port]
     subprocess.check_call(
-        ssh_command(opts) + proxy_opt + ['-t', "%s@%s" % (opts.user, master)])
+        ssh_command(opts) + proxy_opt + ['-t', '-t', "%s@%s" % (opts.user, master)])
 
   elif action == "get-master":
     (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
-- 
cgit v1.2.3