aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorAndrew Or <andrewor14@gmail.com>2014-06-27 16:11:31 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-06-27 16:11:31 -0700
commitf17510e371dfbeaada3c72b884d70c36503ea30a (patch)
tree2a134954b34cdb3a1bf9b3e8dd7d251e9ccef28f /core
parent21e0f77b6321590ed86223a60cdb8ae08ea4057f (diff)
downloadspark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.gz
spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.bz2
spark-f17510e371dfbeaada3c72b884d70c36503ea30a.zip
[SPARK-2259] Fix highly misleading docs on cluster / client deploy modes
The existing docs are highly misleading. For standalone mode, for example, it encourages the user to use standalone-cluster mode, which is not officially supported. The safeguards have been added in Spark submit itself to prevent bad documentation from leading users down the wrong path in the future. This PR is prompted by countless headaches users of Spark have run into on the mailing list. Author: Andrew Or <andrewor14@gmail.com> Closes #1200 from andrewor14/submit-docs and squashes the following commits: 5ea2460 [Andrew Or] Rephrase cluster vs client explanation c827f32 [Andrew Or] Clarify spark submit messages 9f7ed8f [Andrew Or] Clarify client vs cluster deploy mode + add safeguards
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala17
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala5
2 files changed, 17 insertions, 5 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 7e9a9344e6..b050dccb6d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy
import java.io.{File, PrintStream}
import java.lang.reflect.InvocationTargetException
-import java.net.{URI, URL}
+import java.net.URL
import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
@@ -117,14 +117,25 @@ object SparkSubmit {
val isPython = args.isPython
val isYarnCluster = clusterManager == YARN && deployOnCluster
+ // For mesos, only client mode is supported
if (clusterManager == MESOS && deployOnCluster) {
- printErrorAndExit("Cannot currently run driver on the cluster in Mesos")
+ printErrorAndExit("Cluster deploy mode is currently not supported for Mesos clusters.")
+ }
+
+ // For standalone, only client mode is supported
+ if (clusterManager == STANDALONE && deployOnCluster) {
+ printErrorAndExit("Cluster deploy mode is currently not supported for standalone clusters.")
+ }
+
+ // For shells, only client mode is applicable
+ if (isShell(args.primaryResource) && deployOnCluster) {
+ printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
}
// If we're running a python app, set the main class to our specific python runner
if (isPython) {
if (deployOnCluster) {
- printErrorAndExit("Cannot currently run Python driver programs on cluster")
+ printErrorAndExit("Cluster deploy mode is currently not supported for python.")
}
if (args.primaryResource == PYSPARK_SHELL) {
args.mainClass = "py4j.GatewayServer"
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index f1032ea8db..57655aa4c3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -338,8 +338,9 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
"""Usage: spark-submit [options] <app jar | python file> [app options]
|Options:
| --master MASTER_URL spark://host:port, mesos://host:port, yarn, or local.
- | --deploy-mode DEPLOY_MODE Where to run the driver program: either "client" to run
- | on the local machine, or "cluster" to run inside cluster.
+ | --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or
+ | on one of the worker machines inside the cluster ("cluster")
+ | (Default: client).
| --class CLASS_NAME Your application's main class (for Java / Scala apps).
| --name NAME A name of your application.
| --jars JARS Comma-separated list of local jars to include on the driver