aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org/apache
diff options
context:
space:
mode:
authorShixiong Zhu <shixiong@databricks.com>2016-03-28 16:23:29 -0700
committerAndrew Or <andrew@databricks.com>2016-03-28 16:23:29 -0700
commit34c0638ee6f05aef81d90594dd9b8e06006c2c7f (patch)
tree3beb0db18d83b6e0083b68f95a05ac5a8fca0e69 /core/src/main/scala/org/apache
parent328c71161bdae569a534dcd05e14ec485e895c5c (diff)
downloadspark-34c0638ee6f05aef81d90594dd9b8e06006c2c7f.tar.gz
spark-34c0638ee6f05aef81d90594dd9b8e06006c2c7f.tar.bz2
spark-34c0638ee6f05aef81d90594dd9b8e06006c2c7f.zip
[SPARK-14180][CORE] Fix a deadlock in CoarseGrainedExecutorBackend Shutdown
## What changes were proposed in this pull request? Call `executor.stop` in a new thread to eliminate deadlock. ## How was this patch tested? Existing unit tests Author: Shixiong Zhu <shixiong@databricks.com> Closes #12012 from zsxwing/SPARK-14180.
Diffstat (limited to 'core/src/main/scala/org/apache')
-rw-r--r--core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala12
1 files changed, 9 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 320a20033d..81e41e6fa7 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -113,9 +113,15 @@ private[spark] class CoarseGrainedExecutorBackend(
case Shutdown =>
stopping.set(true)
- executor.stop()
- stop()
- rpcEnv.shutdown()
+ new Thread("CoarseGrainedExecutorBackend-stop-executor") {
+ override def run(): Unit = {
+ // executor.stop() will call `SparkEnv.stop()` which waits until RpcEnv stops totally.
+ // However, if `executor.stop()` runs in some thread of RpcEnv, RpcEnv won't be able to
+ // stop until `executor.stop()` returns, which becomes a dead-lock (See SPARK-14180).
+ // Therefore, we put this line in a new thread.
+ executor.stop()
+ }
+ }.start()
}
override def onDisconnected(remoteAddress: RpcAddress): Unit = {