aboutsummaryrefslogtreecommitdiff
path: root/common/network-shuffle/src
diff options
context:
space:
mode:
authorMichael Allman <michael@videoamp.com>2016-08-25 11:57:38 -0700
committerShixiong Zhu <shixiong@databricks.com>2016-08-25 11:57:38 -0700
commitf2093107196b9af62908ecf15bac043f3b1e64c4 (patch)
tree8e92e92cf3ae5376c2dfb19853d386fefef1b03c /common/network-shuffle/src
parentd2ae6399ee2f0524b88262735adbbcb2035de8fd (diff)
downloadspark-f2093107196b9af62908ecf15bac043f3b1e64c4.tar.gz
spark-f2093107196b9af62908ecf15bac043f3b1e64c4.tar.bz2
spark-f2093107196b9af62908ecf15bac043f3b1e64c4.zip
[SPARK-17231][CORE] Avoid building debug or trace log messages unless the respective log level is enabled
(This PR addresses https://issues.apache.org/jira/browse/SPARK-17231) ## What changes were proposed in this pull request? While debugging the performance of a large GraphX connected components computation, we found several places in the `network-common` and `network-shuffle` code bases where trace or debug log messages are constructed even if the respective log level is disabled. According to YourKit, these constructions were creating substantial churn in the eden region. Refactoring the respective code to avoid these unnecessary constructions except where necessary led to a modest but measurable reduction in our job's task time, GC time and the ratio thereof. ## How was this patch tested? We computed the connected components of a graph with about 2.6 billion vertices and 1.7 billion edges four times. We used four different EC2 clusters each with 8 r3.8xl worker nodes. Two test runs used Spark master. Two used Spark master + this PR. The results from the first test run, master and master+PR: ![master](https://cloud.githubusercontent.com/assets/833693/17951634/7471cbca-6a18-11e6-9c26-78afe9319685.jpg) ![logging_perf_improvements](https://cloud.githubusercontent.com/assets/833693/17951632/7467844e-6a18-11e6-9a0e-053dc7650413.jpg) The results from the second test run, master and master+PR: ![master 2](https://cloud.githubusercontent.com/assets/833693/17951633/746dd6aa-6a18-11e6-8e27-606680b3f105.jpg) ![logging_perf_improvements 2](https://cloud.githubusercontent.com/assets/833693/17951631/74488710-6a18-11e6-8a32-08692f373386.jpg) Though modest, I believe these results are significant. Author: Michael Allman <michael@videoamp.com> Closes #14798 from mallman/spark-17231-logging_perf_improvements.
Diffstat (limited to 'common/network-shuffle/src')
-rw-r--r--common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java14
-rw-r--r--common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java2
2 files changed, 9 insertions, 7 deletions
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index 1270cef621..d05d0ac4d2 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -42,7 +42,7 @@ import org.apache.spark.network.server.RpcHandler;
import org.apache.spark.network.server.StreamManager;
import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.AppExecId;
import org.apache.spark.network.shuffle.protocol.*;
-import org.apache.spark.network.util.NettyUtils;
+import static org.apache.spark.network.util.NettyUtils.getRemoteAddress;
import org.apache.spark.network.util.TransportConf;
@@ -101,11 +101,13 @@ public class ExternalShuffleBlockHandler extends RpcHandler {
blocks.add(block);
}
long streamId = streamManager.registerStream(client.getClientId(), blocks.iterator());
- logger.trace("Registered streamId {} with {} buffers for client {} from host {}",
- streamId,
- msg.blockIds.length,
- client.getClientId(),
- NettyUtils.getRemoteAddress(client.getChannel()));
+ if (logger.isTraceEnabled()) {
+ logger.trace("Registered streamId {} with {} buffers for client {} from host {}",
+ streamId,
+ msg.blockIds.length,
+ client.getClientId(),
+ getRemoteAddress(client.getChannel()));
+ }
callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteBuffer());
metrics.blockTransferRateBytes.mark(totalBlockSize);
} finally {
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 56cf1e2e3e..d436711692 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -267,7 +267,7 @@ public class ExternalShuffleBlockResolver {
for (String localDir : dirs) {
try {
JavaUtils.deleteRecursively(new File(localDir));
- logger.debug("Successfully cleaned up directory: " + localDir);
+ logger.debug("Successfully cleaned up directory: {}", localDir);
} catch (Exception e) {
logger.error("Failed to delete directory: " + localDir, e);
}