diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2016-09-21 20:08:28 -0700 |
---|---|---|
committer | Yanbo Liang <ybliang8@gmail.com> | 2016-09-21 20:08:28 -0700 |
commit | c133907c5d9a6e6411b896b5e0cff48b2beff09f (patch) | |
tree | f19d91c861860737b06b0fae0118ce43094cbebe /core/src/main/scala/org/apache | |
parent | 7cbe2164499e83b6c009fdbab0fbfffe89a2ecc0 (diff) | |
download | spark-c133907c5d9a6e6411b896b5e0cff48b2beff09f.tar.gz spark-c133907c5d9a6e6411b896b5e0cff48b2beff09f.tar.bz2 spark-c133907c5d9a6e6411b896b5e0cff48b2beff09f.zip |
[SPARK-17577][SPARKR][CORE] SparkR support add files to Spark job and get by executors
## What changes were proposed in this pull request?
Scala/Python users can add files to Spark job by submit options ```--files``` or ```SparkContext.addFile()```. Meanwhile, users can get the added file by ```SparkFiles.get(filename)```.
We should also support this function for SparkR users, since they also have the requirements for some shared dependency files. For example, SparkR users can download third party R packages to driver firstly, add these files to the Spark job as dependency by this API and then each executor can install these packages by ```install.packages```.
## How was this patch tested?
Add unit test.
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #15131 from yanboliang/spark-17577.
Diffstat (limited to 'core/src/main/scala/org/apache')
-rw-r--r-- | core/src/main/scala/org/apache/spark/SparkContext.scala | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index db84172e16..1981ad5671 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1427,7 +1427,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * supported for Hadoop-supported filesystems. */ def addFile(path: String, recursive: Boolean): Unit = { - val uri = new URI(path) + val uri = new Path(path).toUri val schemeCorrectedPath = uri.getScheme match { case null | "local" => new File(path).getCanonicalFile.toURI.toString case _ => path @@ -1458,8 +1458,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli logInfo(s"Added file $path at $key with timestamp $timestamp") // Fetch the file locally so that closures which are run on the driver can still use the // SparkFiles API to access files. - Utils.fetchFile(path, new File(SparkFiles.getRootDirectory()), conf, env.securityManager, - hadoopConfiguration, timestamp, useCache = false) + Utils.fetchFile(uri.toString, new File(SparkFiles.getRootDirectory()), conf, + env.securityManager, hadoopConfiguration, timestamp, useCache = false) postEnvironmentUpdate() } } |