diff options
author | Colin McCabe <cmccabe@cloudera.com> | 2014-06-04 15:56:29 -0700 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2014-06-04 15:56:29 -0700 |
commit | 1765c8d0ddf6bb5bc3c21f994456eba04c581de4 (patch) | |
tree | 604b2391d61dce732a8bfc5c6f1e3dd7c52102b0 /core | |
parent | 189df165bb7cb8bc8ede48d0e7f8d8b5cd31d299 (diff) | |
download | spark-1765c8d0ddf6bb5bc3c21f994456eba04c581de4.tar.gz spark-1765c8d0ddf6bb5bc3c21f994456eba04c581de4.tar.bz2 spark-1765c8d0ddf6bb5bc3c21f994456eba04c581de4.zip |
SPARK-1518: FileLogger: Fix compile against Hadoop trunk
In Hadoop trunk (currently Hadoop 3.0.0), the deprecated
FSDataOutputStream#sync() method has been removed. Instead, we should
call FSDataOutputStream#hflush, which does the same thing as the
deprecated method used to do.
Author: Colin McCabe <cmccabe@cloudera.com>
Closes #898 from cmccabe/SPARK-1518 and squashes the following commits:
752b9d7 [Colin McCabe] FileLogger: Fix compile against Hadoop trunk
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/FileLogger.scala | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala index 0e6d21b220..6a95dc06e1 100644 --- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala +++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala @@ -61,6 +61,14 @@ private[spark] class FileLogger( // Only defined if the file system scheme is not local private var hadoopDataStream: Option[FSDataOutputStream] = None + // The Hadoop APIs have changed over time, so we use reflection to figure out + // the correct method to use to flush a hadoop data stream. See SPARK-1518 + // for details. + private val hadoopFlushMethod = { + val cls = classOf[FSDataOutputStream] + scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync")) + } + private var writer: Option[PrintWriter] = None /** @@ -149,13 +157,13 @@ private[spark] class FileLogger( /** * Flush the writer to disk manually. * - * If the Hadoop FileSystem is used, the underlying FSDataOutputStream (r1.0.4) must be - * sync()'ed manually as it does not support flush(), which is invoked by when higher - * level streams are flushed. + * When using a Hadoop filesystem, we need to invoke the hflush or sync + * method. In HDFS, hflush guarantees that the data gets to all the + * DataNodes. */ def flush() { writer.foreach(_.flush()) - hadoopDataStream.foreach(_.sync()) + hadoopDataStream.foreach(hadoopFlushMethod.invoke(_)) } /** |