aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy Chen <tnachen@gmail.com>2016-02-01 12:45:02 -0800
committerAndrew Or <andrew@databricks.com>2016-02-01 12:45:02 -0800
commit51b03b71ffc390e67b32936efba61e614a8b0d86 (patch)
treee4f30c0054c46d8a52a8d7b1dd97045de5c53023
parent711ce048a285403241bbc9eaabffc1314162e89c (diff)
downloadspark-51b03b71ffc390e67b32936efba61e614a8b0d86.tar.gz
spark-51b03b71ffc390e67b32936efba61e614a8b0d86.tar.bz2
spark-51b03b71ffc390e67b32936efba61e614a8b0d86.zip
[SPARK-12463][SPARK-12464][SPARK-12465][SPARK-10647][MESOS] Fix zookeeper dir with mesos conf and add docs.
Fix zookeeper dir configuration used in cluster mode, and also add documentation around these settings. Author: Timothy Chen <tnachen@gmail.com> Closes #10057 from tnachen/fix_mesos_dir.
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala4
-rw-r--r--docs/configuration.md23
-rw-r--r--docs/running-on-mesos.md5
-rw-r--r--docs/spark-standalone.md23
5 files changed, 36 insertions, 25 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala b/core/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index 66e1e64500..9b31497adf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -50,7 +50,7 @@ private[mesos] class MesosClusterDispatcher(
extends Logging {
private val publicAddress = Option(conf.getenv("SPARK_PUBLIC_DNS")).getOrElse(args.host)
- private val recoveryMode = conf.get("spark.mesos.deploy.recoveryMode", "NONE").toUpperCase()
+ private val recoveryMode = conf.get("spark.deploy.recoveryMode", "NONE").toUpperCase()
logInfo("Recovery mode in Mesos dispatcher set to: " + recoveryMode)
private val engineFactory = recoveryMode match {
@@ -98,8 +98,8 @@ private[mesos] object MesosClusterDispatcher extends Logging {
conf.setMaster(dispatcherArgs.masterUrl)
conf.setAppName(dispatcherArgs.name)
dispatcherArgs.zookeeperUrl.foreach { z =>
- conf.set("spark.mesos.deploy.recoveryMode", "ZOOKEEPER")
- conf.set("spark.mesos.deploy.zookeeper.url", z)
+ conf.set("spark.deploy.recoveryMode", "ZOOKEEPER")
+ conf.set("spark.deploy.zookeeper.url", z)
}
val dispatcher = new MesosClusterDispatcher(dispatcherArgs, conf)
dispatcher.start()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala
index e0c547dce6..092d9e4182 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterPersistenceEngine.scala
@@ -53,9 +53,9 @@ private[spark] trait MesosClusterPersistenceEngine {
* all of them reuses the same connection pool.
*/
private[spark] class ZookeeperMesosClusterPersistenceEngineFactory(conf: SparkConf)
- extends MesosClusterPersistenceEngineFactory(conf) {
+ extends MesosClusterPersistenceEngineFactory(conf) with Logging {
- lazy val zk = SparkCuratorUtil.newClient(conf, "spark.mesos.deploy.zookeeper.url")
+ lazy val zk = SparkCuratorUtil.newClient(conf)
def createEngine(path: String): MesosClusterPersistenceEngine = {
new ZookeeperMesosClusterPersistenceEngine(path, zk, conf)
diff --git a/docs/configuration.md b/docs/configuration.md
index 74a8fb5d35..93b399d819 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1585,6 +1585,29 @@ Apart from these, the following properties are also available, and may be useful
</tr>
</table>
+#### Deploy
+
+<table class="table">
+ <tr><th>Property Name</th><th>Default</th><th>Meaniing</th></tr>
+ <tr>
+ <td><code>spark.deploy.recoveryMode</code></td>
+ <td>NONE</td>
+ <td>The recovery mode setting to recover submitted Spark jobs with cluster mode when it failed and relaunches.
+ This is only applicable for cluster mode when running with Standalone or Mesos.</td>
+ </tr>
+ <tr>
+ <td><code>spark.deploy.zookeeper.url</code></td>
+ <td>None</td>
+ <td>When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this configuration is used to set the zookeeper URL to connect to.</td>
+ </tr>
+ <tr>
+ <td><code>spark.deploy.zookeeper.dir</code></td>
+ <td>None</td>
+ <td>When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this configuration is used to set the zookeeper directory to store recovery state.</td>
+ </tr>
+</table>
+
+
#### Cluster Managers
Each cluster manager in Spark has additional configuration options. Configurations
can be found on the pages for each mode:
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index ed720f1039..0ef1ccb36e 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -153,7 +153,10 @@ can find the results of the driver from the Mesos Web UI.
To use cluster mode, you must start the `MesosClusterDispatcher` in your cluster via the `sbin/start-mesos-dispatcher.sh` script,
passing in the Mesos master URL (e.g: mesos://host:5050). This starts the `MesosClusterDispatcher` as a daemon running on the host.
-If you like to run the `MesosClusterDispatcher` with Marathon, you need to run the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher`).
+If you like to run the `MesosClusterDispatcher` with Marathon, you need to run the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher`). Note that the `MesosClusterDispatcher` not yet supports multiple instances for HA.
+
+The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch. In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
+For more information about these configurations please refer to the configurations (doc)[configurations.html#deploy].
From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master URL
to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You can view driver statuses on the
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 2fe9ec3542..3de72bc016 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -112,8 +112,8 @@ You can optionally configure the cluster further by setting environment variable
<tr>
<td><code>SPARK_LOCAL_DIRS</code></td>
<td>
- Directory to use for "scratch" space in Spark, including map output files and RDDs that get
- stored on disk. This should be on a fast, local disk in your system. It can also be a
+ Directory to use for "scratch" space in Spark, including map output files and RDDs that get
+ stored on disk. This should be on a fast, local disk in your system. It can also be a
comma-separated list of multiple directories on different disks.
</td>
</tr>
@@ -341,23 +341,8 @@ Learn more about getting started with ZooKeeper [here](http://zookeeper.apache.o
**Configuration**
-In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env using this configuration:
-
-<table class="table">
- <tr><th style="width:21%">System property</th><th>Meaning</th></tr>
- <tr>
- <td><code>spark.deploy.recoveryMode</code></td>
- <td>Set to ZOOKEEPER to enable standby Master recovery mode (default: NONE).</td>
- </tr>
- <tr>
- <td><code>spark.deploy.zookeeper.url</code></td>
- <td>The ZooKeeper cluster url (e.g., 192.168.1.100:2181,192.168.1.101:2181).</td>
- </tr>
- <tr>
- <td><code>spark.deploy.zookeeper.dir</code></td>
- <td>The directory in ZooKeeper to store recovery state (default: /spark).</td>
- </tr>
-</table>
+In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
+For more information about these configurations please refer to the configurations (doc)[configurations.html#deploy]
Possible gotcha: If you have multiple Masters in your cluster but fail to correctly configure the Masters to use ZooKeeper, the Masters will fail to discover each other and think they're all leaders. This will not lead to a healthy cluster state (as all Masters will schedule independently).