From 38020961d101e792393855fd00d8e42f40713754 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Wed, 5 Feb 2014 23:37:07 -0800 Subject: Merge pull request #526 from tgravescs/yarn_client_stop_am_fix. Closes #526. spark on yarn - yarn-client mode doesn't always exit immediately https://spark-project.atlassian.net/browse/SPARK-1049 If you run in the yarn-client mode but you don't get all the workers you requested right away and then you exit your application, the application master stays around until it gets the number of workers you initially requested. This is a waste of resources. The AM should exit immediately upon the client going away. This fix simply checks to see if the driver closed while its waiting for the initial # of workers. Author: Thomas Graves == Merge branch commits == commit 03f40a62584b6bdd094ba91670cd4aa6afe7cd81 Author: Thomas Graves Date: Fri Jan 31 11:23:10 2014 -0600 spark on yarn - yarn-client mode doesn't always exit immediately --- .../src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala | 2 +- .../src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'yarn') diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala index 9fe4d64a0f..138c27910b 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala @@ -210,7 +210,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration, spar // Wait until all containers have finished // TODO: This is a bit ugly. Can we make it nicer? // TODO: Handle container failure - while(yarnAllocator.getNumWorkersRunning < args.numWorkers) { + while ((yarnAllocator.getNumWorkersRunning < args.numWorkers) && (!driverClosed)) { yarnAllocator.allocateContainers(math.max(args.numWorkers - yarnAllocator.getNumWorkersRunning, 0)) Thread.sleep(100) } diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala index 78353224fa..40600f38e5 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala @@ -193,7 +193,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration, spar // TODO: Handle container failure yarnAllocator.addResourceRequests(args.numWorkers) - while (yarnAllocator.getNumWorkersRunning < args.numWorkers) { + while ((yarnAllocator.getNumWorkersRunning < args.numWorkers) && (!driverClosed)) { yarnAllocator.allocateResources() Thread.sleep(100) } -- cgit v1.2.3