aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorTathagata Das <tathagata.das1565@gmail.com>2015-09-22 22:44:09 -0700
committerTathagata Das <tathagata.das1565@gmail.com>2015-09-22 22:44:09 -0700
commit5548a254755bb84edae2768b94ab1816e1b49b91 (patch)
tree4f126c853f9b93a3b24fcc617a1c7c422cd7dc90 /core
parent558e9c7e60a7c0d85ba26634e97562ad2163e91d (diff)
downloadspark-5548a254755bb84edae2768b94ab1816e1b49b91.tar.gz
spark-5548a254755bb84edae2768b94ab1816e1b49b91.tar.bz2
spark-5548a254755bb84edae2768b94ab1816e1b49b91.zip
[SPARK-10652] [SPARK-10742] [STREAMING] Set meaningful job descriptions for all streaming jobs
Here is the screenshot after adding the job descriptions to threads that run receivers and the scheduler thread running the batch jobs. ## All jobs page * Added job descriptions with links to relevant batch details page ![image](https://cloud.githubusercontent.com/assets/663212/9924165/cda4a372-5cb1-11e5-91ca-d43a32c699e9.png) ## All stages page * Added stage descriptions with links to relevant batch details page ![image](https://cloud.githubusercontent.com/assets/663212/9923814/2cce266a-5cae-11e5-8a3f-dad84d06c50e.png) ## Streaming batch details page * Added the +details link ![image](https://cloud.githubusercontent.com/assets/663212/9921977/24014a32-5c98-11e5-958e-457b6c38065b.png) Author: Tathagata Das <tathagata.das1565@gmail.com> Closes #8791 from tdas/SPARK-10652.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/ui/UIUtils.scala62
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala14
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala7
-rw-r--r--core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala66
4 files changed, 137 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index f2da417724..21dc8f0b65 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -18,9 +18,11 @@
package org.apache.spark.ui
import java.text.SimpleDateFormat
-import java.util.{Locale, Date}
+import java.util.{Date, Locale}
-import scala.xml.{Node, Text, Unparsed}
+import scala.util.control.NonFatal
+import scala.xml._
+import scala.xml.transform.{RewriteRule, RuleTransformer}
import org.apache.spark.Logging
import org.apache.spark.ui.scope.RDDOperationGraph
@@ -395,4 +397,60 @@ private[spark] object UIUtils extends Logging {
</script>
}
+ /**
+ * Returns HTML rendering of a job or stage description. It will try to parse the string as HTML
+ * and make sure that it only contains anchors with root-relative links. Otherwise,
+ * the whole string will rendered as a simple escaped text.
+ *
+ * Note: In terms of security, only anchor tags with root relative links are supported. So any
+ * attempts to embed links outside Spark UI, or other tags like <script> will cause in the whole
+ * description to be treated as plain text.
+ */
+ def makeDescription(desc: String, basePathUri: String): NodeSeq = {
+ import scala.language.postfixOps
+
+ // If the description can be parsed as HTML and has only relative links, then render
+ // as HTML, otherwise render as escaped string
+ try {
+ // Try to load the description as unescaped HTML
+ val xml = XML.loadString(s"""<span class="description-input">$desc</span>""")
+
+ // Verify that this has only anchors and span (we are wrapping in span)
+ val allowedNodeLabels = Set("a", "span")
+ val illegalNodes = xml \\ "_" filterNot { case node: Node =>
+ allowedNodeLabels.contains(node.label)
+ }
+ if (illegalNodes.nonEmpty) {
+ throw new IllegalArgumentException(
+ "Only HTML anchors allowed in job descriptions\n" +
+ illegalNodes.map { n => s"${n.label} in $n"}.mkString("\n\t"))
+ }
+
+ // Verify that all links are relative links starting with "/"
+ val allLinks =
+ xml \\ "a" flatMap { _.attributes } filter { _.key == "href" } map { _.value.toString }
+ if (allLinks.exists { ! _.startsWith ("/") }) {
+ throw new IllegalArgumentException(
+ "Links in job descriptions must be root-relative:\n" + allLinks.mkString("\n\t"))
+ }
+
+ // Prepend the relative links with basePathUri
+ val rule = new RewriteRule() {
+ override def transform(n: Node): Seq[Node] = {
+ n match {
+ case e: Elem if e \ "@href" nonEmpty =>
+ val relativePath = e.attribute("href").get.toString
+ val fullUri = s"${basePathUri.stripSuffix("/")}/${relativePath.stripPrefix("/")}"
+ e % Attribute(null, "href", fullUri, Null)
+ case _ => n
+ }
+ }
+ }
+ new RuleTransformer(rule).transform(xml)
+ } catch {
+ case NonFatal(e) =>
+ logWarning(s"Invalid job description: $desc ", e)
+ <span class="description-input">{desc}</span>
+ }
+ }
}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index e72547df72..041cd55ea4 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -17,15 +17,15 @@
package org.apache.spark.ui.jobs
-import scala.collection.mutable.{HashMap, ListBuffer}
-import scala.xml.{Node, NodeSeq, Unparsed, Utility}
-
import java.util.Date
import javax.servlet.http.HttpServletRequest
-import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
-import org.apache.spark.ui.jobs.UIData.{ExecutorUIData, JobUIData}
+import scala.collection.mutable.{HashMap, ListBuffer}
+import scala.xml._
+
import org.apache.spark.JobExecutionStatus
+import org.apache.spark.ui.jobs.UIData.{ExecutorUIData, JobUIData}
+import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
/** Page showing list of all ongoing and recently finished jobs */
private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
@@ -224,6 +224,8 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
}
val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("Unknown")
val formattedSubmissionTime = job.submissionTime.map(UIUtils.formatDate).getOrElse("Unknown")
+ val jobDescription = UIUtils.makeDescription(lastStageDescription, parent.basePath)
+
val detailUrl =
"%s/jobs/job?id=%s".format(UIUtils.prependBaseUri(parent.basePath), job.jobId)
<tr id={"job-" + job.jobId}>
@@ -231,7 +233,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
{job.jobId} {job.jobGroup.map(id => s"($id)").getOrElse("")}
</td>
<td>
- <span class="description-input" title={lastStageDescription}>{lastStageDescription}</span>
+ {jobDescription}
<a href={detailUrl} class="name-link">{lastStageName}</a>
</td>
<td sorttable_customkey={job.submissionTime.getOrElse(-1).toString}>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index 99812db491..ea806d09b6 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -17,11 +17,10 @@
package org.apache.spark.ui.jobs
-import scala.xml.Node
-import scala.xml.Text
-
import java.util.Date
+import scala.xml.{Node, Text}
+
import org.apache.commons.lang3.StringEscapeUtils
import org.apache.spark.scheduler.StageInfo
@@ -116,7 +115,7 @@ private[ui] class StageTableBase(
stageData <- listener.stageIdToData.get((s.stageId, s.attemptId))
desc <- stageData.description
} yield {
- <span class="description-input" title={desc}>{desc}</span>
+ UIUtils.makeDescription(desc, basePathUri)
}
<div>{stageDesc.getOrElse("")} {killLink} {nameLink} {details}</div>
}
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
new file mode 100644
index 0000000000..2b693c1651
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import scala.xml.Elem
+
+import org.apache.spark.SparkFunSuite
+
+class UIUtilsSuite extends SparkFunSuite {
+ import UIUtils._
+
+ test("makeDescription") {
+ verify(
+ """test <a href="/link"> text </a>""",
+ <span class="description-input">test <a href="/link"> text </a></span>,
+ "Correctly formatted text with only anchors and relative links should generate HTML"
+ )
+
+ verify(
+ """test <a href="/link" text </a>""",
+ <span class="description-input">{"""test <a href="/link" text </a>"""}</span>,
+ "Badly formatted text should make the description be treated as a streaming instead of HTML"
+ )
+
+ verify(
+ """test <a href="link"> text </a>""",
+ <span class="description-input">{"""test <a href="link"> text </a>"""}</span>,
+ "Non-relative links should make the description be treated as a string instead of HTML"
+ )
+
+ verify(
+ """test<a><img></img></a>""",
+ <span class="description-input">{"""test<a><img></img></a>"""}</span>,
+ "Non-anchor elements should make the description be treated as a string instead of HTML"
+ )
+
+ verify(
+ """test <a href="/link"> text </a>""",
+ <span class="description-input">test <a href="base/link"> text </a></span>,
+ baseUrl = "base",
+ errorMsg = "Base URL should be prepended to html links"
+ )
+ }
+
+ private def verify(
+ desc: String, expected: Elem, errorMsg: String = "", baseUrl: String = ""): Unit = {
+ val generated = makeDescription(desc, baseUrl)
+ assert(generated.sameElements(expected),
+ s"\n$errorMsg\n\nExpected:\n$expected\nGenerated:\n$generated")
+ }
+}