[SPARK-2259] Fix highly misleading docs on cluster / client deploy modes

The existing docs are highly misleading. For standalone mode, for example, it encourages the user to use standalone-cluster mode, which is not officially supported. The safeguards have been added in Spark submit itself to prevent bad documentation from leading users down the wrong path in the future. This PR is prompted by countless headaches users of Spark have run into on the mailing list. Author: Andrew Or <andrewor14@gmail.com> Closes #1200 from andrewor14/submit-docs and squashes the following commits: 5ea2460 [Andrew Or] Rephrase cluster vs client explanation c827f32 [Andrew Or] Clarify spark submit messages 9f7ed8f [Andrew Or] Clarify client vs cluster deploy mode + add safeguards
author: Andrew Or <andrewor14@gmail.com> 2014-06-27 16:11:31 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-06-27 16:11:31 -0700
commit: f17510e371dfbeaada3c72b884d70c36503ea30a (patch)
tree: 2a134954b34cdb3a1bf9b3e8dd7d251e9ccef28f /core
parent: 21e0f77b6321590ed86223a60cdb8ae08ea4057f (diff)
download: spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.gz
spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.bz2
spark-f17510e371dfbeaada3c72b884d70c36503ea30a.zip
2 files changed, 17 insertions, 5 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 7e9a9344e6..b050dccb6d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy
 
 import java.io.{File, PrintStream}
 import java.lang.reflect.InvocationTargetException
-import java.net.{URI, URL}
+import java.net.URL
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 
@@ -117,14 +117,25 @@ object SparkSubmit {
     val isPython = args.isPython
     val isYarnCluster = clusterManager == YARN && deployOnCluster
 
+    // For mesos, only client mode is supported
     if (clusterManager == MESOS && deployOnCluster) {
-      printErrorAndExit("Cannot currently run driver on the cluster in Mesos")
+      printErrorAndExit("Cluster deploy mode is currently not supported for Mesos clusters.")
+    }
+
+    // For standalone, only client mode is supported
+    if (clusterManager == STANDALONE && deployOnCluster) {
+      printErrorAndExit("Cluster deploy mode is currently not supported for standalone clusters.")
+    }
+
+    // For shells, only client mode is applicable
+    if (isShell(args.primaryResource) && deployOnCluster) {
+      printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
     }
 
     // If we're running a python app, set the main class to our specific python runner
     if (isPython) {
       if (deployOnCluster) {
-        printErrorAndExit("Cannot currently run Python driver programs on cluster")
+        printErrorAndExit("Cluster deploy mode is currently not supported for python.")
       }
       if (args.primaryResource == PYSPARK_SHELL) {
         args.mainClass = "py4j.GatewayServer"
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index f1032ea8db..57655aa4c3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -338,8 +338,9 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
       """Usage: spark-submit [options] <app jar | python file> [app options]
         |Options:
         |  --master MASTER_URL         spark://host:port, mesos://host:port, yarn, or local.
-        |  --deploy-mode DEPLOY_MODE   Where to run the driver program: either "client" to run
-        |                              on the local machine, or "cluster" to run inside cluster.
+        |  --deploy-mode DEPLOY_MODE   Whether to launch the driver program locally ("client") or
+        |                              on one of the worker machines inside the cluster ("cluster")
+        |                              (Default: client).
         |  --class CLASS_NAME          Your application's main class (for Java / Scala apps).
         |  --name NAME                 A name of your application.
         |  --jars JARS                 Comma-separated list of local jars to include on the driver
author	Andrew Or <andrewor14@gmail.com>	2014-06-27 16:11:31 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-06-27 16:11:31 -0700
commit	f17510e371dfbeaada3c72b884d70c36503ea30a (patch)
tree	2a134954b34cdb3a1bf9b3e8dd7d251e9ccef28f /core
parent	21e0f77b6321590ed86223a60cdb8ae08ea4057f (diff)
download	spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.gz spark-f17510e371dfbeaada3c72b884d70c36503ea30a.tar.bz2 spark-f17510e371dfbeaada3c72b884d70c36503ea30a.zip