aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2014-10-05 18:44:12 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-05 18:44:12 -0700
commit8d22dbb5ec7a0727afdfebbbc2c57ffdb384dd0b (patch)
treef624cb51aa42eeb93259c9e186b30574a39229d2
parent90897ea5f24b03c9f3455a62c7f68b3d3f0435ad (diff)
downloadspark-8d22dbb5ec7a0727afdfebbbc2c57ffdb384dd0b.tar.gz
spark-8d22dbb5ec7a0727afdfebbbc2c57ffdb384dd0b.tar.bz2
spark-8d22dbb5ec7a0727afdfebbbc2c57ffdb384dd0b.zip
SPARK-3794 [CORE] Building spark core fails due to inadvertent dependency on Commons IO
Remove references to Commons IO FileUtils and replace with pure Java version, which doesn't need to traverse the whole directory tree first. I think this method could be refined further if it would be alright to rename it and its args and break it down into two methods. I'm starting with a simple recursive rendition. Author: Sean Owen <sowen@cloudera.com> Closes #2662 from srowen/SPARK-3794 and squashes the following commits: 4cd172f [Sean Owen] Remove references to Commons IO FileUtils and replace with pure Java version, which doesn't need to traverse the whole directory tree first
-rwxr-xr-xcore/src/main/scala/org/apache/spark/deploy/worker/Worker.scala1
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala20
2 files changed, 10 insertions, 11 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 3b13f43a18..9b52cb06fb 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -29,7 +29,6 @@ import scala.language.postfixOps
import akka.actor._
import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
-import org.apache.commons.io.FileUtils
import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a67124140f..3d307b3c16 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -35,8 +35,6 @@ import scala.util.control.{ControlThrowable, NonFatal}
import com.google.common.io.Files
import com.google.common.util.concurrent.ThreadFactoryBuilder
-import org.apache.commons.io.FileUtils
-import org.apache.commons.io.filefilter.TrueFileFilter
import org.apache.commons.lang3.SystemUtils
import org.apache.hadoop.conf.Configuration
import org.apache.log4j.PropertyConfigurator
@@ -710,18 +708,20 @@ private[spark] object Utils extends Logging {
* Determines if a directory contains any files newer than cutoff seconds.
*
* @param dir must be the path to a directory, or IllegalArgumentException is thrown
- * @param cutoff measured in seconds. Returns true if there are any files in dir newer than this.
+ * @param cutoff measured in seconds. Returns true if there are any files or directories in the
+ * given directory whose last modified time is later than this many seconds ago
*/
def doesDirectoryContainAnyNewFiles(dir: File, cutoff: Long): Boolean = {
- val currentTimeMillis = System.currentTimeMillis
if (!dir.isDirectory) {
- throw new IllegalArgumentException (dir + " is not a directory!")
- } else {
- val files = FileUtils.listFilesAndDirs(dir, TrueFileFilter.TRUE, TrueFileFilter.TRUE)
- val cutoffTimeInMillis = (currentTimeMillis - (cutoff * 1000))
- val newFiles = files.filter { _.lastModified > cutoffTimeInMillis }
- newFiles.nonEmpty
+ throw new IllegalArgumentException("$dir is not a directory!")
}
+ val filesAndDirs = dir.listFiles()
+ val cutoffTimeInMillis = System.currentTimeMillis - (cutoff * 1000)
+
+ filesAndDirs.exists(_.lastModified() > cutoffTimeInMillis) ||
+ filesAndDirs.filter(_.isDirectory).exists(
+ subdir => doesDirectoryContainAnyNewFiles(subdir, cutoff)
+ )
}
/**