diff options
author | Andrew Or <andrewor14@gmail.com> | 2014-06-27 16:11:31 -0700 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2014-06-27 16:11:31 -0700 |
commit | f17510e371dfbeaada3c72b884d70c36503ea30a (patch) | |
tree | 2a134954b34cdb3a1bf9b3e8dd7d251e9ccef28f /core/src/main/scala | |
parent | 21e0f77b6321590ed86223a60cdb8ae08ea4057f (diff) | |
download | spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.gz spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.bz2 spark-f17510e371dfbeaada3c72b884d70c36503ea30a.zip |
[SPARK-2259] Fix highly misleading docs on cluster / client deploy modes
The existing docs are highly misleading. For standalone mode, for example, it encourages the user to use standalone-cluster mode, which is not officially supported. The safeguards have been added in Spark submit itself to prevent bad documentation from leading users down the wrong path in the future.
This PR is prompted by countless headaches users of Spark have run into on the mailing list.
Author: Andrew Or <andrewor14@gmail.com>
Closes #1200 from andrewor14/submit-docs and squashes the following commits:
5ea2460 [Andrew Or] Rephrase cluster vs client explanation
c827f32 [Andrew Or] Clarify spark submit messages
9f7ed8f [Andrew Or] Clarify client vs cluster deploy mode + add safeguards
Diffstat (limited to 'core/src/main/scala')
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 17 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala | 5 |
2 files changed, 17 insertions, 5 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 7e9a9344e6..b050dccb6d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy import java.io.{File, PrintStream} import java.lang.reflect.InvocationTargetException -import java.net.{URI, URL} +import java.net.URL import scala.collection.mutable.{ArrayBuffer, HashMap, Map} @@ -117,14 +117,25 @@ object SparkSubmit { val isPython = args.isPython val isYarnCluster = clusterManager == YARN && deployOnCluster + // For mesos, only client mode is supported if (clusterManager == MESOS && deployOnCluster) { - printErrorAndExit("Cannot currently run driver on the cluster in Mesos") + printErrorAndExit("Cluster deploy mode is currently not supported for Mesos clusters.") + } + + // For standalone, only client mode is supported + if (clusterManager == STANDALONE && deployOnCluster) { + printErrorAndExit("Cluster deploy mode is currently not supported for standalone clusters.") + } + + // For shells, only client mode is applicable + if (isShell(args.primaryResource) && deployOnCluster) { + printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.") } // If we're running a python app, set the main class to our specific python runner if (isPython) { if (deployOnCluster) { - printErrorAndExit("Cannot currently run Python driver programs on cluster") + printErrorAndExit("Cluster deploy mode is currently not supported for python.") } if (args.primaryResource == PYSPARK_SHELL) { args.mainClass = "py4j.GatewayServer" diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index f1032ea8db..57655aa4c3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -338,8 +338,9 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { """Usage: spark-submit [options] <app jar | python file> [app options] |Options: | --master MASTER_URL spark://host:port, mesos://host:port, yarn, or local. - | --deploy-mode DEPLOY_MODE Where to run the driver program: either "client" to run - | on the local machine, or "cluster" to run inside cluster. + | --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or + | on one of the worker machines inside the cluster ("cluster") + | (Default: client). | --class CLASS_NAME Your application's main class (for Java / Scala apps). | --name NAME A name of your application. | --jars JARS Comma-separated list of local jars to include on the driver |