aboutsummaryrefslogtreecommitdiff
path: root/core/src
diff options
context:
space:
mode:
authorRyan Blue <blue@apache.org>2016-11-02 00:08:30 -0700
committerReynold Xin <rxin@databricks.com>2016-11-02 00:08:30 -0700
commit2dc048081668665f85623839d5f663b402e42555 (patch)
tree19a6cc009b1dcd8b317a951b7ad6ba6cd8e232bd /core/src
parent85c5424d466f4a5765c825e0e2ab30da97611285 (diff)
downloadspark-2dc048081668665f85623839d5f663b402e42555.tar.gz
spark-2dc048081668665f85623839d5f663b402e42555.tar.bz2
spark-2dc048081668665f85623839d5f663b402e42555.zip
[SPARK-17532] Add lock debugging info to thread dumps.
## What changes were proposed in this pull request? This adds information to the web UI thread dump page about the JVM locks held by threads and the locks that threads are blocked waiting to acquire. This should help find cases where lock contention is causing Spark applications to run slowly. ## How was this patch tested? Tested by applying this patch and viewing the change in the web UI. ![thread-lock-info](https://cloud.githubusercontent.com/assets/87915/18493057/6e5da870-79c3-11e6-8c20-f54c18a37544.png) Additions: - A "Thread Locking" column with the locks held by the thread or that are blocking the thread - Links from the a blocked thread to the thread holding the lock - Stack frames show where threads are inside `synchronized` blocks, "holding Monitor(...)" Author: Ryan Blue <blue@apache.org> Closes #15088 from rdblue/SPARK-17532-add-thread-lock-info.
Diffstat (limited to 'core/src')
-rw-r--r--core/src/main/resources/org/apache/spark/ui/static/table.js3
-rw-r--r--core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala12
-rw-r--r--core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala34
4 files changed, 49 insertions, 6 deletions
diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js b/core/src/main/resources/org/apache/spark/ui/static/table.js
index 14b06bfe86..0315ebf5c4 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/table.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/table.js
@@ -36,7 +36,7 @@ function toggleThreadStackTrace(threadId, forceAdd) {
if (stackTrace.length == 0) {
var stackTraceText = $('#' + threadId + "_td_stacktrace").html()
var threadCell = $("#thread_" + threadId + "_tr")
- threadCell.after("<tr id=\"" + threadId +"_stacktrace\" class=\"accordion-body\"><td colspan=\"3\"><pre>" +
+ threadCell.after("<tr id=\"" + threadId +"_stacktrace\" class=\"accordion-body\"><td colspan=\"4\"><pre>" +
stackTraceText + "</pre></td></tr>")
} else {
if (!forceAdd) {
@@ -73,6 +73,7 @@ function onMouseOverAndOut(threadId) {
$("#" + threadId + "_td_id").toggleClass("threaddump-td-mouseover");
$("#" + threadId + "_td_name").toggleClass("threaddump-td-mouseover");
$("#" + threadId + "_td_state").toggleClass("threaddump-td-mouseover");
+ $("#" + threadId + "_td_locking").toggleClass("threaddump-td-mouseover");
}
function onSearchStringChange() {
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index a0ef80d9bd..c6a07445f2 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -48,6 +48,16 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
}
}.map { thread =>
val threadId = thread.threadId
+ val blockedBy = thread.blockedByThreadId match {
+ case Some(blockedByThreadId) =>
+ <div>
+ Blocked by <a href={s"#${thread.blockedByThreadId}_td_id"}>
+ Thread {thread.blockedByThreadId} {thread.blockedByLock}</a>
+ </div>
+ case None => Text("")
+ }
+ val heldLocks = thread.holdingLocks.mkString(", ")
+
<tr id={s"thread_${threadId}_tr"} class="accordion-heading"
onclick={s"toggleThreadStackTrace($threadId, false)"}
onmouseover={s"onMouseOverAndOut($threadId)"}
@@ -55,6 +65,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
<td id={s"${threadId}_td_id"}>{threadId}</td>
<td id={s"${threadId}_td_name"}>{thread.threadName}</td>
<td id={s"${threadId}_td_state"}>{thread.threadState}</td>
+ <td id={s"${threadId}_td_locking"}>{blockedBy}{heldLocks}</td>
<td id={s"${threadId}_td_stacktrace"} class="hidden">{thread.stackTrace}</td>
</tr>
}
@@ -86,6 +97,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
<th onClick="collapseAllThreadStackTrace(false)">Thread ID</th>
<th onClick="collapseAllThreadStackTrace(false)">Thread Name</th>
<th onClick="collapseAllThreadStackTrace(false)">Thread State</th>
+ <th onClick="collapseAllThreadStackTrace(false)">Thread Locks</th>
</thead>
<tbody>{dumpRows}</tbody>
</table>
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
index d4e0ad93b9..b1217980fa 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
@@ -24,4 +24,8 @@ private[spark] case class ThreadStackTrace(
threadId: Long,
threadName: String,
threadState: Thread.State,
- stackTrace: String)
+ stackTrace: String,
+ blockedByThreadId: Option[Long],
+ blockedByLock: String,
+ holdingLocks: Seq[String])
+
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 6027b07c0f..22c28fba20 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,7 +18,7 @@
package org.apache.spark.util
import java.io._
-import java.lang.management.ManagementFactory
+import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo}
import java.net._
import java.nio.ByteBuffer
import java.nio.channels.Channels
@@ -2096,15 +2096,41 @@ private[spark] object Utils extends Logging {
}
}
+ private implicit class Lock(lock: LockInfo) {
+ def lockString: String = {
+ lock match {
+ case monitor: MonitorInfo =>
+ s"Monitor(${lock.getClassName}@${lock.getIdentityHashCode}})"
+ case _ =>
+ s"Lock(${lock.getClassName}@${lock.getIdentityHashCode}})"
+ }
+ }
+ }
+
/** Return a thread dump of all threads' stacktraces. Used to capture dumps for the web UI */
def getThreadDump(): Array[ThreadStackTrace] = {
// We need to filter out null values here because dumpAllThreads() may return null array
// elements for threads that are dead / don't exist.
val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
- val stackTrace = threadInfo.getStackTrace.map(_.toString).mkString("\n")
- ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName,
- threadInfo.getThreadState, stackTrace)
+ val monitors = threadInfo.getLockedMonitors.map(m => m.getLockedStackFrame -> m).toMap
+ val stackTrace = threadInfo.getStackTrace.map { frame =>
+ monitors.get(frame) match {
+ case Some(monitor) =>
+ monitor.getLockedStackFrame.toString + s" => holding ${monitor.lockString}"
+ case None =>
+ frame.toString
+ }
+ }.mkString("\n")
+
+ // use a set to dedup re-entrant locks that are held at multiple places
+ val heldLocks = (threadInfo.getLockedSynchronizers.map(_.lockString)
+ ++ threadInfo.getLockedMonitors.map(_.lockString)
+ ).toSet
+
+ ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName, threadInfo.getThreadState,
+ stackTrace, if (threadInfo.getLockOwnerId < 0) None else Some(threadInfo.getLockOwnerId),
+ Option(threadInfo.getLockInfo).map(_.lockString).getOrElse(""), heldLocks.toSeq)
}
}