aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Bozarth <ajbozart@us.ibm.com>2016-08-30 16:33:54 -0500
committerTom Graves <tgraves@yahoo-inc.com>2016-08-30 16:33:54 -0500
commitf7beae6da02e6b70a34c714e93136becbde7679b (patch)
tree18c0c1d197da796e636a5479e6347cf5d3480357
parent02ac379e8645ce5d32e033f6683136da16fbe584 (diff)
downloadspark-f7beae6da02e6b70a34c714e93136becbde7679b.tar.gz
spark-f7beae6da02e6b70a34c714e93136becbde7679b.tar.bz2
spark-f7beae6da02e6b70a34c714e93136becbde7679b.zip
[SPARK-17243][WEB UI] Spark 2.0 History Server won't load with very large application history
## What changes were proposed in this pull request? With the new History Server the summary page loads the application list via the the REST API, this makes it very slow to impossible to load with large (10K+) application history. This pr fixes this by adding the `spark.history.ui.maxApplications` conf to limit the number of applications the History Server displays. This is accomplished using a new optional `limit` param for the `applications` api. (Note this only applies to what the summary page displays, all the Application UI's are still accessible if the user knows the App ID and goes to the Application UI directly.) I've also added a new test for the `limit` param in `HistoryServerSuite.scala` ## How was this patch tested? Manual testing and dev/run-tests Author: Alex Bozarth <ajbozart@us.ibm.com> Closes #14835 from ajbozarth/spark17243.
-rw-r--r--core/src/main/resources/org/apache/spark/ui/static/historypage.js8
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala3
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/internal/config/package.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala10
-rw-r--r--core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json67
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala1
-rw-r--r--docs/monitoring.md16
8 files changed, 106 insertions, 7 deletions
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 5b9afb59ef..c8094005c6 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -15,6 +15,12 @@
* limitations under the License.
*/
+var appLimit = -1;
+
+function setAppLimit(val) {
+ appLimit = val;
+}
+
function makeIdNumeric(id) {
var strs = id.split("_");
if (strs.length < 3) {
@@ -89,7 +95,7 @@ $(document).ready(function() {
requestedIncomplete = getParameterByName("showIncomplete", searchString);
requestedIncomplete = (requestedIncomplete == "true" ? true : false);
- $.getJSON("api/v1/applications", function(response,status,jqXHR) {
+ $.getJSON("api/v1/applications?limit=" + appLimit, function(response,status,jqXHR) {
var array = [];
var hasMultipleAttempts = false;
for (i in response) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 74f78021ed..b4f5a6114f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -45,7 +45,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
<script src={UIUtils.prependBaseUri("/static/dataTables.rowsGroup.js")}></script> ++
<div id="history-summary" class="span12 pagination"></div> ++
<script src={UIUtils.prependBaseUri("/static/utils.js")}></script> ++
- <script src={UIUtils.prependBaseUri("/static/historypage.js")}></script>
+ <script src={UIUtils.prependBaseUri("/static/historypage.js")}></script> ++
+ <script>setAppLimit({parent.maxApplications})</script>
} else if (requestedIncomplete) {
<h4>No incomplete applications found!</h4>
} else {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index d821474bdb..c178917d8d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -28,6 +28,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, UIRoot}
import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
import org.apache.spark.ui.JettyUtils._
@@ -55,6 +56,9 @@ class HistoryServer(
// How many applications to retain
private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50)
+ // How many applications the summary ui displays
+ private[history] val maxApplications = conf.get(HISTORY_UI_MAX_APPS);
+
// application
private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock())
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index ebce07c1e3..02d7d182a4 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -120,6 +120,10 @@ package object config {
.intConf
.createWithDefault(100000)
+ // To limit how many applications are shown in the History Server summary ui
+ private[spark] val HISTORY_UI_MAX_APPS =
+ ConfigBuilder("spark.history.ui.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE)
+
private[spark] val IO_ENCRYPTION_ENABLED = ConfigBuilder("spark.io.encryption.enabled")
.booleanConf
.createWithDefault(false)
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
index 02fd2985fa..075b9ba37d 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
@@ -29,7 +29,8 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
def appList(
@QueryParam("status") status: JList[ApplicationStatus],
@DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam,
- @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam)
+ @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam,
+ @QueryParam("limit") limit: Integer)
: Iterator[ApplicationInfo] = {
val allApps = uiRoot.getApplicationInfoList
val adjStatus = {
@@ -41,7 +42,7 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
}
val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED)
val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING)
- allApps.filter { app =>
+ val appList = allApps.filter { app =>
val anyRunning = app.attempts.exists(!_.completed)
// if any attempt is still running, we consider the app to also still be running
val statusOk = (!anyRunning && includeCompleted) ||
@@ -53,6 +54,11 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
}
statusOk && dateOk
}
+ if (limit != null) {
+ appList.take(limit)
+ } else {
+ appList
+ }
}
}
diff --git a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
new file mode 100644
index 0000000000..9165f549d7
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
@@ -0,0 +1,67 @@
+[ {
+ "id" : "local-1430917381534",
+ "name" : "Spark shell",
+ "attempts" : [ {
+ "startTime" : "2015-05-06T13:03:00.893GMT",
+ "endTime" : "2015-05-06T13:03:11.398GMT",
+ "lastUpdated" : "",
+ "duration" : 10505,
+ "sparkUser" : "irashid",
+ "completed" : true,
+ "startTimeEpoch" : 1430917380893,
+ "endTimeEpoch" : 1430917391398,
+ "lastUpdatedEpoch" : 0
+ } ]
+}, {
+ "id" : "local-1430917381535",
+ "name" : "Spark shell",
+ "attempts" : [ {
+ "attemptId" : "2",
+ "startTime" : "2015-05-06T13:03:00.893GMT",
+ "endTime" : "2015-05-06T13:03:00.950GMT",
+ "lastUpdated" : "",
+ "duration" : 57,
+ "sparkUser" : "irashid",
+ "completed" : true,
+ "startTimeEpoch" : 1430917380893,
+ "endTimeEpoch" : 1430917380950,
+ "lastUpdatedEpoch" : 0
+ }, {
+ "attemptId" : "1",
+ "startTime" : "2015-05-06T13:03:00.880GMT",
+ "endTime" : "2015-05-06T13:03:00.890GMT",
+ "lastUpdated" : "",
+ "duration" : 10,
+ "sparkUser" : "irashid",
+ "completed" : true,
+ "startTimeEpoch" : 1430917380880,
+ "endTimeEpoch" : 1430917380890,
+ "lastUpdatedEpoch" : 0
+ } ]
+}, {
+ "id" : "local-1426533911241",
+ "name" : "Spark shell",
+ "attempts" : [ {
+ "attemptId" : "2",
+ "startTime" : "2015-03-17T23:11:50.242GMT",
+ "endTime" : "2015-03-17T23:12:25.177GMT",
+ "lastUpdated" : "",
+ "duration" : 34935,
+ "sparkUser" : "irashid",
+ "completed" : true,
+ "startTimeEpoch" : 1426633910242,
+ "endTimeEpoch" : 1426633945177,
+ "lastUpdatedEpoch" : 0
+ }, {
+ "attemptId" : "1",
+ "startTime" : "2015-03-16T19:25:10.242GMT",
+ "endTime" : "2015-03-16T19:25:45.177GMT",
+ "lastUpdated" : "",
+ "duration" : 34935,
+ "sparkUser" : "irashid",
+ "completed" : true,
+ "startTimeEpoch" : 1426533910242,
+ "endTimeEpoch" : 1426533945177,
+ "lastUpdatedEpoch" : 0
+ } ]
+} ]
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 631a7cd9d5..ae3f5d9c01 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -100,6 +100,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
"minDate app list json" -> "applications?minDate=2015-02-10",
"maxDate app list json" -> "applications?maxDate=2015-02-10",
"maxDate2 app list json" -> "applications?maxDate=2015-02-03T16:42:40.000GMT",
+ "limit app list json" -> "applications?limit=3",
"one app json" -> "applications/local-1422981780767",
"one app multi-attempt json" -> "applications/local-1426533911241",
"job list json" -> "applications/local-1422981780767/jobs",
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 6fdf87b4be..5804e4f26c 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -114,8 +114,17 @@ The history server can be configured as follows:
<td>spark.history.retainedApplications</td>
<td>50</td>
<td>
- The number of application UIs to retain. If this cap is exceeded, then the oldest
- applications will be removed.
+ The number of applications to retain UI data for in the cache. If this cap is exceeded, then
+ the oldest applications will be removed from the cache. If an application is not in the cache,
+ it will have to be loaded from disk if its accessed from the UI.
+ </td>
+ </tr>
+ <tr>
+ <td>spark.history.ui.maxApplications</td>
+ <td>Int.MaxValue</td>
+ <td>
+ The number of applications to display on the history summary page. Application UIs are still
+ available by accessing their URLs directly even if they are not displayed on the history summary page.
</td>
</tr>
<tr>
@@ -242,7 +251,8 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
<br>Examples:
<br><code>?minDate=2015-02-10</code>
<br><code>?minDate=2015-02-03T16:42:40.000GMT</code>
- <br><code>?maxDate=[date]</code> latest date/time to list; uses same format as <code>minDate</code>.</td>
+ <br><code>?maxDate=[date]</code> latest date/time to list; uses same format as <code>minDate</code>.
+ <br><code>?limit=[limit]</code> limits the number of applications listed.</td>
</tr>
<tr>
<td><code>/applications/[app-id]/jobs</code></td>