diff options
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala | 7 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala | 36 |
2 files changed, 40 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index d494ff0659..c5740e4737 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -294,7 +294,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) .filter { entry => try { val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L) - !entry.isDirectory() && prevFileSize < entry.getLen() + !entry.isDirectory() && + // FsHistoryProvider generates a hidden file which can't be read. Accidentally + // reading a garbage file is safe, but we would log an error which can be scary to + // the end-user. + !entry.getPath().getName().startsWith(".") && + prevFileSize < entry.getLen() } catch { case e: AccessControlException => // Do not use "logInfo" since these messages can get pretty noisy if printed on diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 39c5857b13..01bef0a11c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -17,8 +17,7 @@ package org.apache.spark.deploy.history -import java.io.{BufferedOutputStream, ByteArrayInputStream, ByteArrayOutputStream, File, - FileOutputStream, OutputStreamWriter} +import java.io._ import java.net.URI import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit @@ -394,6 +393,39 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc } } + test("ignore hidden files") { + + // FsHistoryProvider should ignore hidden files. (It even writes out a hidden file itself + // that should be ignored). + + // write out one totally bogus hidden file + val hiddenGarbageFile = new File(testDir, ".garbage") + val out = new PrintWriter(hiddenGarbageFile) + // scalastyle:off println + out.println("GARBAGE") + // scalastyle:on println + out.close() + + // also write out one real event log file, but since its a hidden file, we shouldn't read it + val tmpNewAppFile = newLogFile("hidden", None, inProgress = false) + val hiddenNewAppFile = new File(tmpNewAppFile.getParentFile, "." + tmpNewAppFile.getName) + tmpNewAppFile.renameTo(hiddenNewAppFile) + + // and write one real file, which should still get picked up just fine + val newAppComplete = newLogFile("real-app", None, inProgress = false) + writeFile(newAppComplete, true, None, + SparkListenerApplicationStart(newAppComplete.getName(), Some("new-app-complete"), 1L, "test", + None), + SparkListenerApplicationEnd(5L) + ) + + val provider = new FsHistoryProvider(createTestConf()) + updateAndCheck(provider) { list => + list.size should be (1) + list(0).name should be ("real-app") + } + } + /** * Asks the provider to check for logs and calls a function to perform checks on the updated * app list. Example: |