diff options
author | Alex Bozarth <ajbozart@us.ibm.com> | 2016-08-30 16:33:54 -0500 |
---|---|---|
committer | Tom Graves <tgraves@yahoo-inc.com> | 2016-08-30 16:33:54 -0500 |
commit | f7beae6da02e6b70a34c714e93136becbde7679b (patch) | |
tree | 18c0c1d197da796e636a5479e6347cf5d3480357 /core/src/main/scala | |
parent | 02ac379e8645ce5d32e033f6683136da16fbe584 (diff) | |
download | spark-f7beae6da02e6b70a34c714e93136becbde7679b.tar.gz spark-f7beae6da02e6b70a34c714e93136becbde7679b.tar.bz2 spark-f7beae6da02e6b70a34c714e93136becbde7679b.zip |
[SPARK-17243][WEB UI] Spark 2.0 History Server won't load with very large application history
## What changes were proposed in this pull request?
With the new History Server the summary page loads the application list via the the REST API, this makes it very slow to impossible to load with large (10K+) application history. This pr fixes this by adding the `spark.history.ui.maxApplications` conf to limit the number of applications the History Server displays. This is accomplished using a new optional `limit` param for the `applications` api. (Note this only applies to what the summary page displays, all the Application UI's are still accessible if the user knows the App ID and goes to the Application UI directly.)
I've also added a new test for the `limit` param in `HistoryServerSuite.scala`
## How was this patch tested?
Manual testing and dev/run-tests
Author: Alex Bozarth <ajbozart@us.ibm.com>
Closes #14835 from ajbozarth/spark17243.
Diffstat (limited to 'core/src/main/scala')
4 files changed, 18 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala index 74f78021ed..b4f5a6114f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala @@ -45,7 +45,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") <script src={UIUtils.prependBaseUri("/static/dataTables.rowsGroup.js")}></script> ++ <div id="history-summary" class="span12 pagination"></div> ++ <script src={UIUtils.prependBaseUri("/static/utils.js")}></script> ++ - <script src={UIUtils.prependBaseUri("/static/historypage.js")}></script> + <script src={UIUtils.prependBaseUri("/static/historypage.js")}></script> ++ + <script>setAppLimit({parent.maxApplications})</script> } else if (requestedIncomplete) { <h4>No incomplete applications found!</h4> } else { diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index d821474bdb..c178917d8d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -28,6 +28,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging +import org.apache.spark.internal.config._ import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, UIRoot} import org.apache.spark.ui.{SparkUI, UIUtils, WebUI} import org.apache.spark.ui.JettyUtils._ @@ -55,6 +56,9 @@ class HistoryServer( // How many applications to retain private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50) + // How many applications the summary ui displays + private[history] val maxApplications = conf.get(HISTORY_UI_MAX_APPS); + // application private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock()) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index ebce07c1e3..02d7d182a4 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -120,6 +120,10 @@ package object config { .intConf .createWithDefault(100000) + // To limit how many applications are shown in the History Server summary ui + private[spark] val HISTORY_UI_MAX_APPS = + ConfigBuilder("spark.history.ui.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE) + private[spark] val IO_ENCRYPTION_ENABLED = ConfigBuilder("spark.io.encryption.enabled") .booleanConf .createWithDefault(false) diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala index 02fd2985fa..075b9ba37d 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala @@ -29,7 +29,8 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) { def appList( @QueryParam("status") status: JList[ApplicationStatus], @DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam, - @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam) + @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam, + @QueryParam("limit") limit: Integer) : Iterator[ApplicationInfo] = { val allApps = uiRoot.getApplicationInfoList val adjStatus = { @@ -41,7 +42,7 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) { } val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED) val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING) - allApps.filter { app => + val appList = allApps.filter { app => val anyRunning = app.attempts.exists(!_.completed) // if any attempt is still running, we consider the app to also still be running val statusOk = (!anyRunning && includeCompleted) || @@ -53,6 +54,11 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) { } statusOk && dateOk } + if (limit != null) { + appList.take(limit) + } else { + appList + } } } |