aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorTom Graves <tgraves@yahoo-inc.com>2015-10-09 14:06:25 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2015-10-09 14:06:25 -0700
commit63c340a710b24869410d56602b712fbfe443e6f0 (patch)
treec5ba32f8f446dfdaab4ba325f2da688b90ec4e64 /core
parent12b7191d2075ae870c73529de450cbb5725872ec (diff)
downloadspark-63c340a710b24869410d56602b712fbfe443e6f0.tar.gz
spark-63c340a710b24869410d56602b712fbfe443e6f0.tar.bz2
spark-63c340a710b24869410d56602b712fbfe443e6f0.zip
[SPARK-10858] YARN: archives/jar/files rename with # doesn't work unl
https://issues.apache.org/jira/browse/SPARK-10858 The issue here is that in resolveURI we default to calling new File(path).getAbsoluteFile().toURI(). But if the path passed in already has a # in it then File(path) will think that is supposed to be part of the actual file path and not a fragment so it changes # to %23. Then when we try to parse that later in Client as a URI it doesn't recognize there is a fragment. so to fix we just check if there is a fragment, still create the File like we did before and then add the fragment back on. Author: Tom Graves <tgraves@yahoo-inc.com> Closes #9035 from tgravescs/SPARK-10858.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala7
-rw-r--r--core/src/test/scala/org/apache/spark/util/UtilsSuite.scala6
2 files changed, 10 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 2bab4af2e7..e60c1b355a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1749,6 +1749,13 @@ private[spark] object Utils extends Logging {
if (uri.getScheme() != null) {
return uri
}
+ // make sure to handle if the path has a fragment (applies to yarn
+ // distributed cache)
+ if (uri.getFragment() != null) {
+ val absoluteURI = new File(uri.getPath()).getAbsoluteFile().toURI()
+ return new URI(absoluteURI.getScheme(), absoluteURI.getHost(), absoluteURI.getPath(),
+ uri.getFragment())
+ }
} catch {
case e: URISyntaxException =>
}
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 1fb81ad565..68b0da76bc 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -384,7 +384,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar")
assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar")
assertResolves("spark.jar", s"file:$cwd/spark.jar")
- assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar")
+ assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar")
assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt")
if (Utils.isWindows) {
assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt")
@@ -414,10 +414,10 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2")
assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6",
- s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6")
+ s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5,file:$cwd/path%20to/jar6")
if (Utils.isWindows) {
assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""",
- s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4")
+ s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi,file:/C:/path%20to/jar4")
}
}