diff options
author | Tathagata Das <tathagata.das1565@gmail.com> | 2015-09-22 22:44:09 -0700 |
---|---|---|
committer | Tathagata Das <tathagata.das1565@gmail.com> | 2015-09-22 22:44:09 -0700 |
commit | 5548a254755bb84edae2768b94ab1816e1b49b91 (patch) | |
tree | 4f126c853f9b93a3b24fcc617a1c7c422cd7dc90 /core | |
parent | 558e9c7e60a7c0d85ba26634e97562ad2163e91d (diff) | |
download | spark-5548a254755bb84edae2768b94ab1816e1b49b91.tar.gz spark-5548a254755bb84edae2768b94ab1816e1b49b91.tar.bz2 spark-5548a254755bb84edae2768b94ab1816e1b49b91.zip |
[SPARK-10652] [SPARK-10742] [STREAMING] Set meaningful job descriptions for all streaming jobs
Here is the screenshot after adding the job descriptions to threads that run receivers and the scheduler thread running the batch jobs.
## All jobs page
* Added job descriptions with links to relevant batch details page
![image](https://cloud.githubusercontent.com/assets/663212/9924165/cda4a372-5cb1-11e5-91ca-d43a32c699e9.png)
## All stages page
* Added stage descriptions with links to relevant batch details page
![image](https://cloud.githubusercontent.com/assets/663212/9923814/2cce266a-5cae-11e5-8a3f-dad84d06c50e.png)
## Streaming batch details page
* Added the +details link
![image](https://cloud.githubusercontent.com/assets/663212/9921977/24014a32-5c98-11e5-958e-457b6c38065b.png)
Author: Tathagata Das <tathagata.das1565@gmail.com>
Closes #8791 from tdas/SPARK-10652.
Diffstat (limited to 'core')
4 files changed, 137 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index f2da417724..21dc8f0b65 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -18,9 +18,11 @@ package org.apache.spark.ui import java.text.SimpleDateFormat -import java.util.{Locale, Date} +import java.util.{Date, Locale} -import scala.xml.{Node, Text, Unparsed} +import scala.util.control.NonFatal +import scala.xml._ +import scala.xml.transform.{RewriteRule, RuleTransformer} import org.apache.spark.Logging import org.apache.spark.ui.scope.RDDOperationGraph @@ -395,4 +397,60 @@ private[spark] object UIUtils extends Logging { </script> } + /** + * Returns HTML rendering of a job or stage description. It will try to parse the string as HTML + * and make sure that it only contains anchors with root-relative links. Otherwise, + * the whole string will rendered as a simple escaped text. + * + * Note: In terms of security, only anchor tags with root relative links are supported. So any + * attempts to embed links outside Spark UI, or other tags like <script> will cause in the whole + * description to be treated as plain text. + */ + def makeDescription(desc: String, basePathUri: String): NodeSeq = { + import scala.language.postfixOps + + // If the description can be parsed as HTML and has only relative links, then render + // as HTML, otherwise render as escaped string + try { + // Try to load the description as unescaped HTML + val xml = XML.loadString(s"""<span class="description-input">$desc</span>""") + + // Verify that this has only anchors and span (we are wrapping in span) + val allowedNodeLabels = Set("a", "span") + val illegalNodes = xml \\ "_" filterNot { case node: Node => + allowedNodeLabels.contains(node.label) + } + if (illegalNodes.nonEmpty) { + throw new IllegalArgumentException( + "Only HTML anchors allowed in job descriptions\n" + + illegalNodes.map { n => s"${n.label} in $n"}.mkString("\n\t")) + } + + // Verify that all links are relative links starting with "/" + val allLinks = + xml \\ "a" flatMap { _.attributes } filter { _.key == "href" } map { _.value.toString } + if (allLinks.exists { ! _.startsWith ("/") }) { + throw new IllegalArgumentException( + "Links in job descriptions must be root-relative:\n" + allLinks.mkString("\n\t")) + } + + // Prepend the relative links with basePathUri + val rule = new RewriteRule() { + override def transform(n: Node): Seq[Node] = { + n match { + case e: Elem if e \ "@href" nonEmpty => + val relativePath = e.attribute("href").get.toString + val fullUri = s"${basePathUri.stripSuffix("/")}/${relativePath.stripPrefix("/")}" + e % Attribute(null, "href", fullUri, Null) + case _ => n + } + } + } + new RuleTransformer(rule).transform(xml) + } catch { + case NonFatal(e) => + logWarning(s"Invalid job description: $desc ", e) + <span class="description-input">{desc}</span> + } + } } diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index e72547df72..041cd55ea4 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -17,15 +17,15 @@ package org.apache.spark.ui.jobs -import scala.collection.mutable.{HashMap, ListBuffer} -import scala.xml.{Node, NodeSeq, Unparsed, Utility} - import java.util.Date import javax.servlet.http.HttpServletRequest -import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage} -import org.apache.spark.ui.jobs.UIData.{ExecutorUIData, JobUIData} +import scala.collection.mutable.{HashMap, ListBuffer} +import scala.xml._ + import org.apache.spark.JobExecutionStatus +import org.apache.spark.ui.jobs.UIData.{ExecutorUIData, JobUIData} +import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage} /** Page showing list of all ongoing and recently finished jobs */ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { @@ -224,6 +224,8 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { } val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("Unknown") val formattedSubmissionTime = job.submissionTime.map(UIUtils.formatDate).getOrElse("Unknown") + val jobDescription = UIUtils.makeDescription(lastStageDescription, parent.basePath) + val detailUrl = "%s/jobs/job?id=%s".format(UIUtils.prependBaseUri(parent.basePath), job.jobId) <tr id={"job-" + job.jobId}> @@ -231,7 +233,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { {job.jobId} {job.jobGroup.map(id => s"($id)").getOrElse("")} </td> <td> - <span class="description-input" title={lastStageDescription}>{lastStageDescription}</span> + {jobDescription} <a href={detailUrl} class="name-link">{lastStageName}</a> </td> <td sorttable_customkey={job.submissionTime.getOrElse(-1).toString}> diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala index 99812db491..ea806d09b6 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala @@ -17,11 +17,10 @@ package org.apache.spark.ui.jobs -import scala.xml.Node -import scala.xml.Text - import java.util.Date +import scala.xml.{Node, Text} + import org.apache.commons.lang3.StringEscapeUtils import org.apache.spark.scheduler.StageInfo @@ -116,7 +115,7 @@ private[ui] class StageTableBase( stageData <- listener.stageIdToData.get((s.stageId, s.attemptId)) desc <- stageData.description } yield { - <span class="description-input" title={desc}>{desc}</span> + UIUtils.makeDescription(desc, basePathUri) } <div>{stageDesc.getOrElse("")} {killLink} {nameLink} {details}</div> } diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala new file mode 100644 index 0000000000..2b693c1651 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui + +import scala.xml.Elem + +import org.apache.spark.SparkFunSuite + +class UIUtilsSuite extends SparkFunSuite { + import UIUtils._ + + test("makeDescription") { + verify( + """test <a href="/link"> text </a>""", + <span class="description-input">test <a href="/link"> text </a></span>, + "Correctly formatted text with only anchors and relative links should generate HTML" + ) + + verify( + """test <a href="/link" text </a>""", + <span class="description-input">{"""test <a href="/link" text </a>"""}</span>, + "Badly formatted text should make the description be treated as a streaming instead of HTML" + ) + + verify( + """test <a href="link"> text </a>""", + <span class="description-input">{"""test <a href="link"> text </a>"""}</span>, + "Non-relative links should make the description be treated as a string instead of HTML" + ) + + verify( + """test<a><img></img></a>""", + <span class="description-input">{"""test<a><img></img></a>"""}</span>, + "Non-anchor elements should make the description be treated as a string instead of HTML" + ) + + verify( + """test <a href="/link"> text </a>""", + <span class="description-input">test <a href="base/link"> text </a></span>, + baseUrl = "base", + errorMsg = "Base URL should be prepended to html links" + ) + } + + private def verify( + desc: String, expected: Elem, errorMsg: String = "", baseUrl: String = ""): Unit = { + val generated = makeDescription(desc, baseUrl) + assert(generated.sameElements(expected), + s"\n$errorMsg\n\nExpected:\n$expected\nGenerated:\n$generated") + } +} |