diff options
author | huangzhaowei <carlmartinmax@gmail.com> | 2015-01-07 08:10:42 -0600 |
---|---|---|
committer | Thomas Graves <tgraves@apache.org> | 2015-01-07 08:10:42 -0600 |
commit | 5fde66163fe460d6f64b145047f76cc4ee33601a (patch) | |
tree | 91975e5f97d343604fcd31d51a1765992b7323d3 /yarn/src | |
parent | e21acc1978a6f4a57ef2e08490692b0ffe05fa9e (diff) | |
download | spark-5fde66163fe460d6f64b145047f76cc4ee33601a.tar.gz spark-5fde66163fe460d6f64b145047f76cc4ee33601a.tar.bz2 spark-5fde66163fe460d6f64b145047f76cc4ee33601a.zip |
[YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA
Nowadays, yarn-client will exit directly when the HA change happens no matter how many times the am should retry.
The reason may be that the default final status only considerred the sys.exit, and the yarn-client HA cann't benefit from this.
So we should distinct the default final status between client and cluster, because the SUCCEEDED status may cause the HA failed in client mode and UNDEFINED may cause the error reporter in cluster when using sys.exit.
Author: huangzhaowei <carlmartinmax@gmail.com>
Closes #3771 from SaintBacchus/YarnHA and squashes the following commits:
c02bfcc [huangzhaowei] Improve the comment of the funciton 'getDefaultFinalStatus'
0e69924 [huangzhaowei] Bug fix: fix the yarn-client code to support HA
Diffstat (limited to 'yarn/src')
-rw-r--r-- | yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 9c77dff48d..618db7f908 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, @volatile private var exitCode = 0 @volatile private var unregistered = false @volatile private var finished = false - @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED + @volatile private var finalStatus = getDefaultFinalStatus @volatile private var finalMsg: String = "" @volatile private var userClassThread: Thread = _ @@ -153,6 +153,20 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, } /** + * Set the default final application status for client mode to UNDEFINED to handle + * if YARN HA restarts the application so that it properly retries. Set the final + * status to SUCCEEDED in cluster mode to handle if the user calls System.exit + * from the application code. + */ + final def getDefaultFinalStatus() = { + if (isDriver) { + FinalApplicationStatus.SUCCEEDED + } else { + FinalApplicationStatus.UNDEFINED + } + } + + /** * unregister is used to completely unregister the application from the ResourceManager. * This means the ResourceManager will not retry the application attempt on your behalf if * a failure occurred. |