From 8fd178d2151da53c0edc7ed3a92ebd01780d7702 Mon Sep 17 00:00:00 2001 From: zuotingbing Date: Tue, 7 Feb 2017 12:21:36 +0000 Subject: [SPARK-19260] Spaces or "%20" in path parameter are not correctly handled with… MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JIRA Issue: https://issues.apache.org/jira/browse/SPARK-19260 ## What changes were proposed in this pull request? 1. “spark.history.fs.logDirectory” supports with space character and “%20” characters. 2. As usually, if the run classpath includes hdfs-site.xml and core-site.xml files, the supplied path eg."/test" which does not contain a scheme should be taken as a HDFS path rather than a local path since the path parameter is a Hadoop dir. ## How was this patch tested? Update Unit Test and take some manual tests local: .sbin/start-history-server.sh "file:/a b" .sbin/start-history-server.sh "/abc%20c" (without hdfs-site.xml,core-site.xml) .sbin/start-history-server.sh "/a b" (without hdfs-site.xml,core-site.xml) .sbin/start-history-server.sh "/a b/a bc%20c" (without hdfs-site.xml,core-site.xml) hdfs: .sbin/start-history-server.sh "hdfs:/namenode:9000/a b" .sbin/start-history-server.sh "/a b" (with hdfs-site.xml,core-site.xml) .sbin/start-history-server.sh "/a b/a bc%20c" (with hdfs-site.xml,core-site.xml) Author: zuotingbing Closes #16614 from zuotingbing/SPARK-19260. --- .../main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'core/src/main') diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index cd241d6d22..9012736bc2 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -94,7 +94,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt) private val logDir = conf.getOption("spark.history.fs.logDirectory") - .map { d => Utils.resolveURI(d).toString } .getOrElse(DEFAULT_LOG_DIR) private val HISTORY_UI_ACLS_ENABLE = conf.getBoolean("spark.history.ui.acls.enable", false) @@ -105,7 +104,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) "; groups with admin permissions" + HISTORY_UI_ADMIN_ACLS_GROUPS.toString) private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) - private val fs = Utils.getHadoopFileSystem(logDir, hadoopConf) + private val fs = new Path(logDir).getFileSystem(hadoopConf) // Used by check event thread and clean log thread. // Scheduled thread pool size must be one, otherwise it will have concurrent issues about fs -- cgit v1.2.3