aboutsummaryrefslogtreecommitdiff
path: root/core/src
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-09-21 20:08:28 -0700
committerYanbo Liang <ybliang8@gmail.com>2016-09-21 20:08:28 -0700
commitc133907c5d9a6e6411b896b5e0cff48b2beff09f (patch)
treef19d91c861860737b06b0fae0118ce43094cbebe /core/src
parent7cbe2164499e83b6c009fdbab0fbfffe89a2ecc0 (diff)
downloadspark-c133907c5d9a6e6411b896b5e0cff48b2beff09f.tar.gz
spark-c133907c5d9a6e6411b896b5e0cff48b2beff09f.tar.bz2
spark-c133907c5d9a6e6411b896b5e0cff48b2beff09f.zip
[SPARK-17577][SPARKR][CORE] SparkR support add files to Spark job and get by executors
## What changes were proposed in this pull request? Scala/Python users can add files to Spark job by submit options ```--files``` or ```SparkContext.addFile()```. Meanwhile, users can get the added file by ```SparkFiles.get(filename)```. We should also support this function for SparkR users, since they also have the requirements for some shared dependency files. For example, SparkR users can download third party R packages to driver firstly, add these files to the Spark job as dependency by this API and then each executor can install these packages by ```install.packages```. ## How was this patch tested? Add unit test. Author: Yanbo Liang <ybliang8@gmail.com> Closes #15131 from yanboliang/spark-17577.
Diffstat (limited to 'core/src')
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala6
1 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index db84172e16..1981ad5671 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1427,7 +1427,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
* supported for Hadoop-supported filesystems.
*/
def addFile(path: String, recursive: Boolean): Unit = {
- val uri = new URI(path)
+ val uri = new Path(path).toUri
val schemeCorrectedPath = uri.getScheme match {
case null | "local" => new File(path).getCanonicalFile.toURI.toString
case _ => path
@@ -1458,8 +1458,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
logInfo(s"Added file $path at $key with timestamp $timestamp")
// Fetch the file locally so that closures which are run on the driver can still use the
// SparkFiles API to access files.
- Utils.fetchFile(path, new File(SparkFiles.getRootDirectory()), conf, env.securityManager,
- hadoopConfiguration, timestamp, useCache = false)
+ Utils.fetchFile(uri.toString, new File(SparkFiles.getRootDirectory()), conf,
+ env.securityManager, hadoopConfiguration, timestamp, useCache = false)
postEnvironmentUpdate()
}
}