aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Graves <tgraves@apache.org>2014-01-08 08:49:20 -0600
committerThomas Graves <tgraves@apache.org>2014-01-08 08:49:20 -0600
commit6eef78d769379b98a1fa6d4f2fa606216e5063a6 (patch)
tree47ca5ed7b3a4a5f1732a0647d564ec5218107fa2
parentbb6a39a6872fb8e7118c5a402b7b9d67459e9a64 (diff)
parent67af8031368c4a3558aeb46c290b7b7a09516539 (diff)
downloadspark-6eef78d769379b98a1fa6d4f2fa606216e5063a6.tar.gz
spark-6eef78d769379b98a1fa6d4f2fa606216e5063a6.tar.bz2
spark-6eef78d769379b98a1fa6d4f2fa606216e5063a6.zip
Merge pull request #345 from colorant/yarn
support distributing extra files to worker for yarn client mode So that user doesn't need to package all dependency into one assemble jar as spark app jar
-rw-r--r--docs/running-on-yarn.md2
-rw-r--r--yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala2
-rw-r--r--yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala4
-rw-r--r--yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala2
4 files changed, 7 insertions, 3 deletions
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 717071d72c..b206270107 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -114,6 +114,8 @@ For example:
SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
MASTER=yarn-client ./bin/spark-shell
+You can also send extra files to yarn cluster for worker to use by exporting SPARK_YARN_DIST_FILES=file1,file2... etc.
+
# Building Spark for Hadoop/YARN 2.2.x
See [Building Spark with Maven](building-with-maven.html) for instructions on how to build Spark using the Maven process.
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 6abb4d5017..23781ea35c 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -102,7 +102,7 @@ class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
(System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
(args.userJar == null) -> "Error: You must specify a user jar!",
(args.userClass == null) -> "Error: You must specify a user class!",
- (args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
+ (args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
(args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be " +
"greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
(args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size " +
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 324ef4616f..4b1b5da048 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -39,6 +39,7 @@ private[spark] class YarnClientSchedulerBackend(
val defaultWorkerNumber = "1"
val userJar = System.getenv("SPARK_YARN_APP_JAR")
+ val distFiles = System.getenv("SPARK_YARN_DIST_FILES")
var workerCores = System.getenv("SPARK_WORKER_CORES")
var workerMemory = System.getenv("SPARK_WORKER_MEMORY")
var workerNumber = System.getenv("SPARK_WORKER_INSTANCES")
@@ -64,7 +65,8 @@ private[spark] class YarnClientSchedulerBackend(
"--worker-memory", workerMemory,
"--worker-cores", workerCores,
"--num-workers", workerNumber,
- "--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher"
+ "--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher",
+ "--files", distFiles
)
val args = new ClientArguments(argsArray, conf)
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 440ad5cde5..be323d7783 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -122,7 +122,7 @@ class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
(System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
(args.userJar == null) -> "Error: You must specify a user jar!",
(args.userClass == null) -> "Error: You must specify a user class!",
- (args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
+ (args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
(args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be" +
"greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
(args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size" +