aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorImran Rashid <irashid@cloudera.com>2016-09-29 15:40:35 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2016-09-29 15:40:35 -0700
commit3993ebca23afa4b8770695051635933a6c9d2c11 (patch)
tree5e5918efa9e804721591f06f29b79b29f8e6c1d7
parent29396e7d1483d027960b9a1bed47008775c4253e (diff)
downloadspark-3993ebca23afa4b8770695051635933a6c9d2c11.tar.gz
spark-3993ebca23afa4b8770695051635933a6c9d2c11.tar.bz2
spark-3993ebca23afa4b8770695051635933a6c9d2c11.zip
[SPARK-17676][CORE] FsHistoryProvider should ignore hidden files
## What changes were proposed in this pull request? FsHistoryProvider was writing a hidden file (to check the fs's clock). Even though it deleted the file immediately, sometimes another thread would try to scan the files on the fs in-between, and then there would be an error msg logged which was very misleading for the end-user. (The logged error was harmless, though.) ## How was this patch tested? I added one unit test, but to be clear, that test was passing before. The actual change in behavior in that test is just logging (after the change, there is no more logged error), which I just manually verified. Author: Imran Rashid <irashid@cloudera.com> Closes #15250 from squito/SPARK-17676.
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala7
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala36
2 files changed, 40 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index d494ff0659..c5740e4737 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -294,7 +294,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
.filter { entry =>
try {
val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L)
- !entry.isDirectory() && prevFileSize < entry.getLen()
+ !entry.isDirectory() &&
+ // FsHistoryProvider generates a hidden file which can't be read. Accidentally
+ // reading a garbage file is safe, but we would log an error which can be scary to
+ // the end-user.
+ !entry.getPath().getName().startsWith(".") &&
+ prevFileSize < entry.getLen()
} catch {
case e: AccessControlException =>
// Do not use "logInfo" since these messages can get pretty noisy if printed on
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 39c5857b13..01bef0a11c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -17,8 +17,7 @@
package org.apache.spark.deploy.history
-import java.io.{BufferedOutputStream, ByteArrayInputStream, ByteArrayOutputStream, File,
- FileOutputStream, OutputStreamWriter}
+import java.io._
import java.net.URI
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit
@@ -394,6 +393,39 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
}
}
+ test("ignore hidden files") {
+
+ // FsHistoryProvider should ignore hidden files. (It even writes out a hidden file itself
+ // that should be ignored).
+
+ // write out one totally bogus hidden file
+ val hiddenGarbageFile = new File(testDir, ".garbage")
+ val out = new PrintWriter(hiddenGarbageFile)
+ // scalastyle:off println
+ out.println("GARBAGE")
+ // scalastyle:on println
+ out.close()
+
+ // also write out one real event log file, but since its a hidden file, we shouldn't read it
+ val tmpNewAppFile = newLogFile("hidden", None, inProgress = false)
+ val hiddenNewAppFile = new File(tmpNewAppFile.getParentFile, "." + tmpNewAppFile.getName)
+ tmpNewAppFile.renameTo(hiddenNewAppFile)
+
+ // and write one real file, which should still get picked up just fine
+ val newAppComplete = newLogFile("real-app", None, inProgress = false)
+ writeFile(newAppComplete, true, None,
+ SparkListenerApplicationStart(newAppComplete.getName(), Some("new-app-complete"), 1L, "test",
+ None),
+ SparkListenerApplicationEnd(5L)
+ )
+
+ val provider = new FsHistoryProvider(createTestConf())
+ updateAndCheck(provider) { list =>
+ list.size should be (1)
+ list(0).name should be ("real-app")
+ }
+ }
+
/**
* Asks the provider to check for logs and calls a function to perform checks on the updated
* app list. Example: