aboutsummaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
authorTejas Patil <tejasp@fb.com>2016-05-18 12:10:32 +0100
committerSean Owen <sowen@cloudera.com>2016-05-18 12:10:32 +0100
commitc1fd9cacba8e602cc55a893f0beb05ea48c3a1f6 (patch)
treed455f0cb8841bf0560ae56d56ce05037b8a780da /common
parent420b700695fe8bcdda406c34ad48230b9dfc07f1 (diff)
downloadspark-c1fd9cacba8e602cc55a893f0beb05ea48c3a1f6.tar.gz
spark-c1fd9cacba8e602cc55a893f0beb05ea48c3a1f6.tar.bz2
spark-c1fd9cacba8e602cc55a893f0beb05ea48c3a1f6.zip
[SPARK-15263][CORE] Make shuffle service dir cleanup faster by using `rm -rf`
## What changes were proposed in this pull request? Jira: https://issues.apache.org/jira/browse/SPARK-15263 The current logic for directory cleanup is slow because it does directory listing, recurses over child directories, checks for symbolic links, deletes leaf files and finally deletes the dirs when they are empty. There is back-and-forth switching from kernel space to user space while doing this. Since most of the deployment backends would be Unix systems, we could essentially just do `rm -rf` so that entire deletion logic runs in kernel space. The current Java based impl in Spark seems to be similar to what standard libraries like guava and commons IO do (eg. http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/FileUtils.java?view=markup#l1540). However, guava removed this method in favour of shelling out to an operating system command (like in this PR). See the `Deprecated` note in older javadocs for guava for details : http://google.github.io/guava/releases/10.0.1/api/docs/com/google/common/io/Files.html#deleteRecursively(java.io.File) Ideally, Java should be providing such APIs so that users won't have to do such things to get platform specific code. Also, its not just about speed, but also handling race conditions while doing at FS deletions is tricky. I could find this bug for Java in similar context : http://bugs.java.com/bugdatabase/view_bug.do?bug_id=7148952 ## How was this patch tested? I am relying on existing test cases to test the method. If there are suggestions about testing it, welcome to hear about it. ## Performance gains *Input setup* : Created a nested directory structure of depth 3 and each entry having 50 sub-dirs. The input being cleaned up had total ~125k dirs. Ran both approaches (in isolation) for 6 times to get average numbers: Native Java cleanup | `rm -rf` as a separate process ------------ | ------------- 10.04 sec | 4.11 sec This change made deletion 2.4 times faster for the given test input. Author: Tejas Patil <tejasp@fb.com> Closes #13042 from tejasapatil/delete_recursive.
Diffstat (limited to 'common')
-rw-r--r--common/network-common/pom.xml4
-rw-r--r--common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java49
-rw-r--r--common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java24
3 files changed, 61 insertions, 16 deletions
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5444ae6d70..12d89273d7 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -41,6 +41,10 @@
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ </dependency>
<!-- Provided dependencies -->
<dependency>
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
index fbed2f053d..eb534eed24 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import io.netty.buffer.Unpooled;
+import org.apache.commons.lang3.SystemUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -79,14 +80,32 @@ public class JavaUtils {
return Unpooled.wrappedBuffer(b).toString(StandardCharsets.UTF_8);
}
- /*
+ /**
* Delete a file or directory and its contents recursively.
* Don't follow directories if they are symlinks.
- * Throws an exception if deletion is unsuccessful.
+ *
+ * @param file Input file / dir to be deleted
+ * @throws IOException if deletion is unsuccessful
*/
public static void deleteRecursively(File file) throws IOException {
if (file == null) { return; }
+ // On Unix systems, use operating system command to run faster
+ // If that does not work out, fallback to the Java IO way
+ if (SystemUtils.IS_OS_UNIX) {
+ try {
+ deleteRecursivelyUsingUnixNative(file);
+ return;
+ } catch (IOException e) {
+ logger.warn("Attempt to delete using native Unix OS command failed for path = {}. " +
+ "Falling back to Java IO way", file.getAbsolutePath(), e);
+ }
+ }
+
+ deleteRecursivelyUsingJavaIO(file);
+ }
+
+ private static void deleteRecursivelyUsingJavaIO(File file) throws IOException {
if (file.isDirectory() && !isSymlink(file)) {
IOException savedIOException = null;
for (File child : listFilesSafely(file)) {
@@ -109,6 +128,32 @@ public class JavaUtils {
}
}
+ private static void deleteRecursivelyUsingUnixNative(File file) throws IOException {
+ ProcessBuilder builder = new ProcessBuilder("rm", "-rf", file.getAbsolutePath());
+ Process process = null;
+ int exitCode = -1;
+
+ try {
+ // In order to avoid deadlocks, consume the stdout (and stderr) of the process
+ builder.redirectErrorStream(true);
+ builder.redirectOutput(new File("/dev/null"));
+
+ process = builder.start();
+
+ exitCode = process.waitFor();
+ } catch (Exception e) {
+ throw new IOException("Failed to delete: " + file.getAbsolutePath(), e);
+ } finally {
+ if (process != null && process.isAlive()) {
+ process.destroy();
+ }
+ }
+
+ if (exitCode != 0 || file.exists()) {
+ throw new IOException("Failed to delete: " + file.getAbsolutePath());
+ }
+ }
+
private static File[] listFilesSafely(File file) throws IOException {
if (file.exists()) {
File[] files = file.listFiles();
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 62a1fb42b0..81e01949e5 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -27,12 +27,17 @@ import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.JavaUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Manages some sort-shuffle data, including the creation
* and cleanup of directories that can be read by the {@link ExternalShuffleBlockResolver}.
*/
public class TestShuffleDataContext {
+ private static final Logger logger = LoggerFactory.getLogger(TestShuffleDataContext.class);
+
public final String[] localDirs;
public final int subDirsPerLocalDir;
@@ -53,7 +58,11 @@ public class TestShuffleDataContext {
public void cleanup() {
for (String localDir : localDirs) {
- deleteRecursively(new File(localDir));
+ try {
+ JavaUtils.deleteRecursively(new File(localDir));
+ } catch (IOException e) {
+ logger.warn("Unable to cleanup localDir = " + localDir, e);
+ }
}
}
@@ -92,17 +101,4 @@ public class TestShuffleDataContext {
public ExecutorShuffleInfo createExecutorInfo(String shuffleManager) {
return new ExecutorShuffleInfo(localDirs, subDirsPerLocalDir, shuffleManager);
}
-
- private static void deleteRecursively(File f) {
- assert f != null;
- if (f.isDirectory()) {
- File[] children = f.listFiles();
- if (children != null) {
- for (File child : children) {
- deleteRecursively(child);
- }
- }
- }
- f.delete();
- }
}