aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Ganelin <ilya.ganelin@capitalone.com>2015-07-01 23:11:02 -0700
committerAndrew Or <andrew@databricks.com>2015-07-01 23:11:02 -0700
commit3697232b7d438979cc119b2a364296b0eec4a16a (patch)
tree178437d6c6d5ffac1560ce96d1d89f3ca0ba805e
parent377ff4c9e8942882183d94698684824e9dc9f391 (diff)
downloadspark-3697232b7d438979cc119b2a364296b0eec4a16a.tar.gz
spark-3697232b7d438979cc119b2a364296b0eec4a16a.tar.bz2
spark-3697232b7d438979cc119b2a364296b0eec4a16a.zip
[SPARK-3071] Increase default driver memory
I've updated default values in comments, documentation, and in the command line builder to be 1g based on comments in the JIRA. I've also updated most usages to point at a single variable defined in the Utils.scala and JavaUtils.java files. This wasn't possible in all cases (R, shell scripts etc.) but usage in most code is now pointing at the same place. Please let me know if I've missed anything. Will the spark-shell use the value within the command line builder during instantiation? Author: Ilya Ganelin <ilya.ganelin@capitalone.com> Closes #7132 from ilganeli/SPARK-3071 and squashes the following commits: 4074164 [Ilya Ganelin] String fix 271610b [Ilya Ganelin] Merge branch 'SPARK-3071' of github.com:ilganeli/spark into SPARK-3071 273b6e9 [Ilya Ganelin] Test fix fd67721 [Ilya Ganelin] Update JavaUtils.java 26cc177 [Ilya Ganelin] test fix e5db35d [Ilya Ganelin] Fixed test failure 39732a1 [Ilya Ganelin] merge fix a6f7deb [Ilya Ganelin] Created default value for DRIVER MEM in Utils that's now used in almost all locations instead of setting manually in each 09ad698 [Ilya Ganelin] Update SubmitRestProtocolSuite.scala 19b6f25 [Ilya Ganelin] Missed one doc update 2698a3d [Ilya Ganelin] Updated default value for driver memory
-rw-r--r--R/pkg/R/sparkR.R2
-rwxr-xr-xconf/spark-env.sh.template2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala5
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala6
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala10
-rw-r--r--docs/configuration.md4
-rw-r--r--launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java2
-rw-r--r--launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala2
-rw-r--r--network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java6
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala7
15 files changed, 35 insertions, 21 deletions
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 633b869f91..86233e01db 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -109,7 +109,7 @@ sparkR.init <- function(
return(get(".sparkRjsc", envir = .sparkREnv))
}
- sparkMem <- Sys.getenv("SPARK_MEM", "512m")
+ sparkMem <- Sys.getenv("SPARK_MEM", "1024m")
jars <- suppressWarnings(normalizePath(as.character(sparkJars)))
# Classpath separator is ";" on Windows
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 43c4288912..192d3ae091 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -22,7 +22,7 @@
# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
-# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
+# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 1G)
# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
index 316e2d59f0..42d3296062 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -117,7 +117,7 @@ private[deploy] class ClientArguments(args: Array[String]) {
private[deploy] object ClientArguments {
val DEFAULT_CORES = 1
- val DEFAULT_MEMORY = 512 // MB
+ val DEFAULT_MEMORY = Utils.DEFAULT_DRIVER_MEM_MB // MB
val DEFAULT_SUPERVISE = false
def isValidJarUrl(s: String): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index b7429a901e..73ab18332f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -461,8 +461,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
|Usage: spark-submit --status [submission ID] --master [spark://...]""".stripMargin)
outStream.println(command)
+ val mem_mb = Utils.DEFAULT_DRIVER_MEM_MB
outStream.println(
- """
+ s"""
|Options:
| --master MASTER_URL spark://host:port, mesos://host:port, yarn, or local.
| --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or
@@ -488,7 +489,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
| --properties-file FILE Path to a file from which to load extra properties. If not
| specified, this will look for conf/spark-defaults.conf.
|
- | --driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: 512M).
+ | --driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: ${mem_mb}M).
| --driver-java-options Extra Java options to pass to the driver.
| --driver-library-path Extra library path entries to pass to the driver.
| --driver-class-path Extra class path entries to pass to the driver. Note that
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
index 8198296eeb..868cc35d06 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
@@ -59,7 +59,7 @@ private[mesos] class MesosSubmitRequestServlet(
extends SubmitRequestServlet {
private val DEFAULT_SUPERVISE = false
- private val DEFAULT_MEMORY = 512 // mb
+ private val DEFAULT_MEMORY = Utils.DEFAULT_DRIVER_MEM_MB // mb
private val DEFAULT_CORES = 1.0
private val nextDriverNumber = new AtomicLong(0)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 9678631da9..1d2ecab517 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -164,7 +164,7 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
}
}
// Leave out 1 GB for the operating system, but don't return a negative memory size
- math.max(totalMb - 1024, 512)
+ math.max(totalMb - 1024, Utils.DEFAULT_DRIVER_MEM_MB)
}
def checkWorkerMemory(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a7fc749a2b..944560a913 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -80,6 +80,12 @@ private[spark] object Utils extends Logging {
*/
val TEMP_DIR_SHUTDOWN_PRIORITY = 25
+ /**
+ * Define a default value for driver memory here since this value is referenced across the code
+ * base and nearly all files already use Utils.scala
+ */
+ val DEFAULT_DRIVER_MEM_MB = JavaUtils.DEFAULT_DRIVER_MEM_MB.toInt
+
private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
@volatile private var localRootDirs: Array[String] = null
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
index 115ac0534a..725b8848bc 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
@@ -18,11 +18,11 @@
package org.apache.spark.deploy.rest
import java.lang.Boolean
-import java.lang.Integer
import org.json4s.jackson.JsonMethods._
import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.util.Utils
/**
* Tests for the REST application submission protocol.
@@ -93,7 +93,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
// optional fields
conf.set("spark.jars", "mayonnaise.jar,ketchup.jar")
conf.set("spark.files", "fireball.png")
- conf.set("spark.driver.memory", "512m")
+ conf.set("spark.driver.memory", s"${Utils.DEFAULT_DRIVER_MEM_MB}m")
conf.set("spark.driver.cores", "180")
conf.set("spark.driver.extraJavaOptions", " -Dslices=5 -Dcolor=mostly_red")
conf.set("spark.driver.extraClassPath", "food-coloring.jar")
@@ -126,7 +126,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
assert(newMessage.sparkProperties("spark.app.name") === "SparkPie")
assert(newMessage.sparkProperties("spark.jars") === "mayonnaise.jar,ketchup.jar")
assert(newMessage.sparkProperties("spark.files") === "fireball.png")
- assert(newMessage.sparkProperties("spark.driver.memory") === "512m")
+ assert(newMessage.sparkProperties("spark.driver.memory") === s"${Utils.DEFAULT_DRIVER_MEM_MB}m")
assert(newMessage.sparkProperties("spark.driver.cores") === "180")
assert(newMessage.sparkProperties("spark.driver.extraJavaOptions") ===
" -Dslices=5 -Dcolor=mostly_red")
@@ -230,7 +230,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
""".stripMargin
private val submitDriverRequestJson =
- """
+ s"""
|{
| "action" : "CreateSubmissionRequest",
| "appArgs" : [ "two slices", "a hint of cinnamon" ],
@@ -246,7 +246,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
| "spark.driver.supervise" : "false",
| "spark.app.name" : "SparkPie",
| "spark.cores.max" : "10000",
- | "spark.driver.memory" : "512m",
+ | "spark.driver.memory" : "${Utils.DEFAULT_DRIVER_MEM_MB}m",
| "spark.files" : "fireball.png",
| "spark.driver.cores" : "180",
| "spark.driver.extraJavaOptions" : " -Dslices=5 -Dcolor=mostly_red",
diff --git a/docs/configuration.md b/docs/configuration.md
index affcd21514..bebaf6f62e 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -137,10 +137,10 @@ of the most common options to set are:
</tr>
<tr>
<td><code>spark.driver.memory</code></td>
- <td>512m</td>
+ <td>1g</td>
<td>
Amount of memory to use for the driver process, i.e. where SparkContext is initialized.
- (e.g. <code>512m</code>, <code>2g</code>).
+ (e.g. <code>1g</code>, <code>2g</code>).
<br /><em>Note:</em> In client mode, this config must not be set through the <code>SparkConf</code>
directly in your application, because the driver JVM has already started at that point.
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index 2665a700fe..a16c0d2b5c 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -27,7 +27,7 @@ import java.util.Map;
*/
class CommandBuilderUtils {
- static final String DEFAULT_MEM = "512m";
+ static final String DEFAULT_MEM = "1g";
static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
static final String ENV_SPARK_HOME = "SPARK_HOME";
static final String ENV_SPARK_ASSEMBLY = "_SPARK_ASSEMBLY";
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 3e5a2820b6..87c43aa998 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -208,7 +208,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
// - properties file.
// - SPARK_DRIVER_MEMORY env variable
// - SPARK_MEM env variable
- // - default value (512m)
+ // - default value (1g)
// Take Thrift Server as daemon
String tsMemory =
isThriftServer(mainClass) ? System.getenv("SPARK_DAEMON_MEMORY") : null;
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 25bb1453db..f2c78bbabf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -198,7 +198,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
val driverMemory = sc.getConf.getOption("spark.driver.memory")
.orElse(Option(System.getenv("SPARK_DRIVER_MEMORY")))
.map(Utils.memoryStringToMb)
- .getOrElse(512)
+ .getOrElse(Utils.DEFAULT_DRIVER_MEM_MB)
if (driverMemory <= memThreshold) {
logWarning(s"$thisClassName.save() was called, but it may fail because of too little" +
s" driver memory (${driverMemory}m)." +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 1e3333d8d8..905c5fb42b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -387,7 +387,7 @@ private[tree] object TreeEnsembleModel extends Logging {
val driverMemory = sc.getConf.getOption("spark.driver.memory")
.orElse(Option(System.getenv("SPARK_DRIVER_MEMORY")))
.map(Utils.memoryStringToMb)
- .getOrElse(512)
+ .getOrElse(Utils.DEFAULT_DRIVER_MEM_MB)
if (driverMemory <= memThreshold) {
logWarning(s"$className.save() was called, but it may fail because of too little" +
s" driver memory (${driverMemory}m)." +
diff --git a/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
index 6b514aaa12..7d27439cfd 100644
--- a/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/network/common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -39,6 +39,12 @@ import org.slf4j.LoggerFactory;
public class JavaUtils {
private static final Logger logger = LoggerFactory.getLogger(JavaUtils.class);
+ /**
+ * Define a default value for driver memory here since this value is referenced across the code
+ * base and nearly all files already use Utils.scala
+ */
+ public static final long DEFAULT_DRIVER_MEM_MB = 1024;
+
/** Closes the given object, ignoring IOExceptions. */
public static void closeQuietly(Closeable closeable) {
try {
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 35e990602a..19d1bbff99 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -46,7 +46,7 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
var keytab: String = null
def isClusterMode: Boolean = userClass != null
- private var driverMemory: Int = 512 // MB
+ private var driverMemory: Int = Utils.DEFAULT_DRIVER_MEM_MB // MB
private var driverCores: Int = 1
private val driverMemOverheadKey = "spark.yarn.driver.memoryOverhead"
private val amMemKey = "spark.yarn.am.memory"
@@ -262,8 +262,9 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
private def getUsageMessage(unknownParam: List[String] = null): String = {
val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else ""
+ val mem_mb = Utils.DEFAULT_DRIVER_MEM_MB
message +
- """
+ s"""
|Usage: org.apache.spark.deploy.yarn.Client [options]
|Options:
| --jar JAR_PATH Path to your application's JAR file (required in yarn-cluster
@@ -275,7 +276,7 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
| Multiple invocations are possible, each will be passed in order.
| --num-executors NUM Number of executors to start (Default: 2)
| --executor-cores NUM Number of cores per executor (Default: 1).
- | --driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: 512 Mb)
+ | --driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: $mem_mb Mb)
| --driver-cores NUM Number of cores used by the driver (Default: 1).
| --executor-memory MEM Memory per executor (e.g. 1000M, 2G) (Default: 1G)
| --name NAME The name of your application (Default: Spark)