aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorfelixcheung <felixcheung_m@hotmail.com>2015-10-30 13:51:32 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-10-30 13:51:32 -0700
commitbb5a2af034196620d869fc9b1a400e014e718b8c (patch)
tree55df31e52b9dea29ec7061e2e1e66db6b7199018 /R
parent729f983e66cf65da2e8f48c463ccde2b355240c4 (diff)
downloadspark-bb5a2af034196620d869fc9b1a400e014e718b8c.tar.gz
spark-bb5a2af034196620d869fc9b1a400e014e718b8c.tar.bz2
spark-bb5a2af034196620d869fc9b1a400e014e718b8c.zip
[SPARK-11340][SPARKR] Support setting driver properties when starting Spark from R programmatically or from RStudio
Mapping spark.driver.memory from sparkEnvir to spark-submit commandline arguments. shivaram suggested that we possibly add other spark.driver.* properties - do we want to add all of those? I thought those could be set in SparkConf? sun-rui Author: felixcheung <felixcheung_m@hotmail.com> Closes #9290 from felixcheung/rdrivermem.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/sparkR.R45
-rw-r--r--R/pkg/inst/tests/test_context.R27
2 files changed, 67 insertions, 5 deletions
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 043b0057bd..004d08e74e 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -77,7 +77,9 @@ sparkR.stop <- function() {
#' Initialize a new Spark Context.
#'
-#' This function initializes a new SparkContext.
+#' This function initializes a new SparkContext. For details on how to initialize
+#' and use SparkR, refer to SparkR programming guide at
+#' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparkcontext-sqlcontext}.
#'
#' @param master The Spark master URL.
#' @param appName Application name to register with cluster manager
@@ -93,7 +95,7 @@ sparkR.stop <- function() {
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
#' list(spark.executor.memory="1g"))
#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
-#' list(spark.executor.memory="1g"),
+#' list(spark.executor.memory="4g"),
#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
#' c("jarfile1.jar","jarfile2.jar"))
#'}
@@ -123,16 +125,21 @@ sparkR.init <- function(
uriSep <- "////"
}
+ sparkEnvirMap <- convertNamedListToEnv(sparkEnvir)
+
existingPort <- Sys.getenv("EXISTING_SPARKR_BACKEND_PORT", "")
if (existingPort != "") {
backendPort <- existingPort
} else {
path <- tempfile(pattern = "backend_port")
+ submitOps <- getClientModeSparkSubmitOpts(
+ Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
+ sparkEnvirMap)
launchBackend(
args = path,
sparkHome = sparkHome,
jars = jars,
- sparkSubmitOpts = Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
+ sparkSubmitOpts = submitOps,
packages = sparkPackages)
# wait atmost 100 seconds for JVM to launch
wait <- 0.1
@@ -171,8 +178,6 @@ sparkR.init <- function(
sparkHome <- suppressWarnings(normalizePath(sparkHome))
}
- sparkEnvirMap <- convertNamedListToEnv(sparkEnvir)
-
sparkExecutorEnvMap <- convertNamedListToEnv(sparkExecutorEnv)
if(is.null(sparkExecutorEnvMap$LD_LIBRARY_PATH)) {
sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <-
@@ -320,3 +325,33 @@ clearJobGroup <- function(sc) {
cancelJobGroup <- function(sc, groupId) {
callJMethod(sc, "cancelJobGroup", groupId)
}
+
+sparkConfToSubmitOps <- new.env()
+sparkConfToSubmitOps[["spark.driver.memory"]] <- "--driver-memory"
+sparkConfToSubmitOps[["spark.driver.extraClassPath"]] <- "--driver-class-path"
+sparkConfToSubmitOps[["spark.driver.extraJavaOptions"]] <- "--driver-java-options"
+sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-path"
+
+# Utility function that returns Spark Submit arguments as a string
+#
+# A few Spark Application and Runtime environment properties cannot take effect after driver
+# JVM has started, as documented in:
+# http://spark.apache.org/docs/latest/configuration.html#application-properties
+# When starting SparkR without using spark-submit, for example, from Rstudio, add them to
+# spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS so that they can be effective.
+getClientModeSparkSubmitOpts <- function(submitOps, sparkEnvirMap) {
+ envirToOps <- lapply(ls(sparkConfToSubmitOps), function(conf) {
+ opsValue <- sparkEnvirMap[[conf]]
+ # process only if --option is not already specified
+ if (!is.null(opsValue) &&
+ nchar(opsValue) > 1 &&
+ !grepl(sparkConfToSubmitOps[[conf]], submitOps)) {
+ # put "" around value in case it has spaces
+ paste0(sparkConfToSubmitOps[[conf]], " \"", opsValue, "\" ")
+ } else {
+ ""
+ }
+ })
+ # --option must be before the application class "sparkr-shell" in submitOps
+ paste0(paste0(envirToOps, collapse = ""), submitOps)
+}
diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R
index e99815ed15..80c1b89a4c 100644
--- a/R/pkg/inst/tests/test_context.R
+++ b/R/pkg/inst/tests/test_context.R
@@ -65,3 +65,30 @@ test_that("job group functions can be called", {
cancelJobGroup(sc, "groupId")
clearJobGroup(sc)
})
+
+test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
+ e <- new.env()
+ e[["spark.driver.memory"]] <- "512m"
+ ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
+ expect_equal("--driver-memory \"512m\" sparkrmain", ops)
+
+ e[["spark.driver.memory"]] <- "5g"
+ e[["spark.driver.extraClassPath"]] <- "/opt/class_path" # nolint
+ e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings"
+ e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib" # nolint
+ e[["random"]] <- "skipthis"
+ ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e)
+ # nolint start
+ expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"",
+ "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"",
+ "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell"))
+ # nolint end
+
+ e[["spark.driver.extraClassPath"]] <- "/" # too short
+ ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e)
+ # nolint start
+ expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ",
+ "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"",
+ " --driver-memory 4g sparkr-shell2"))
+ # nolint end
+})