diff options
author | Aaron Davidson <aaron@databricks.com> | 2014-11-06 19:54:32 -0800 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2014-11-06 19:54:32 -0800 |
commit | 48a19a6dba896f7d0b637f84e114b7efbb814e51 (patch) | |
tree | 70fb3c87345730f36e3b9c2f0d94c606b6d8279f /core/src | |
parent | f165b2bbf5d4acf34d826fa55b900f5bbc295654 (diff) | |
download | spark-48a19a6dba896f7d0b637f84e114b7efbb814e51.tar.gz spark-48a19a6dba896f7d0b637f84e114b7efbb814e51.tar.bz2 spark-48a19a6dba896f7d0b637f84e114b7efbb814e51.zip |
[SPARK-4236] Cleanup removed applications' files in shuffle service
This relies on a hook from whoever is hosting the shuffle service to invoke removeApplication() when the application is completed. Once invoked, we will clean up all the executors' shuffle directories we know about.
Author: Aaron Davidson <aaron@databricks.com>
Closes #3126 from aarondav/cleanup and squashes the following commits:
33a64a9 [Aaron Davidson] Missing brace
e6e428f [Aaron Davidson] Address comments
16a0d27 [Aaron Davidson] Cleanup
e4df3e7 [Aaron Davidson] [SPARK-4236] Cleanup removed applications' files in shuffle service
Diffstat (limited to 'core/src')
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/Utils.scala | 1 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala | 5 |
2 files changed, 4 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 7caf6bcf94..2cbd38d72c 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -755,6 +755,7 @@ private[spark] object Utils extends Logging { /** * Delete a file or directory and its contents recursively. * Don't follow directories if they are symlinks. + * Throws an exception if deletion is unsuccessful. */ def deleteRecursively(file: File) { if (file != null) { diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala index 792b9cd8b6..6608ed1e57 100644 --- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala @@ -63,8 +63,9 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll { rdd.count() rdd.count() - // Invalidate the registered executors, disallowing access to their shuffle blocks. - rpcHandler.clearRegisteredExecutors() + // Invalidate the registered executors, disallowing access to their shuffle blocks (without + // deleting the actual shuffle files, so we could access them without the shuffle service). + rpcHandler.applicationRemoved(sc.conf.getAppId, false /* cleanupLocalDirs */) // Now Spark will receive FetchFailed, and not retry the stage due to "spark.test.noStageRetry" // being set. |