diff options
author | felixcheung <felixcheung_m@hotmail.com> | 2015-10-30 13:51:32 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-10-30 13:51:32 -0700 |
commit | bb5a2af034196620d869fc9b1a400e014e718b8c (patch) | |
tree | 55df31e52b9dea29ec7061e2e1e66db6b7199018 /R/pkg | |
parent | 729f983e66cf65da2e8f48c463ccde2b355240c4 (diff) | |
download | spark-bb5a2af034196620d869fc9b1a400e014e718b8c.tar.gz spark-bb5a2af034196620d869fc9b1a400e014e718b8c.tar.bz2 spark-bb5a2af034196620d869fc9b1a400e014e718b8c.zip |
[SPARK-11340][SPARKR] Support setting driver properties when starting Spark from R programmatically or from RStudio
Mapping spark.driver.memory from sparkEnvir to spark-submit commandline arguments.
shivaram suggested that we possibly add other spark.driver.* properties - do we want to add all of those? I thought those could be set in SparkConf?
sun-rui
Author: felixcheung <felixcheung_m@hotmail.com>
Closes #9290 from felixcheung/rdrivermem.
Diffstat (limited to 'R/pkg')
-rw-r--r-- | R/pkg/R/sparkR.R | 45 | ||||
-rw-r--r-- | R/pkg/inst/tests/test_context.R | 27 |
2 files changed, 67 insertions, 5 deletions
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 043b0057bd..004d08e74e 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -77,7 +77,9 @@ sparkR.stop <- function() { #' Initialize a new Spark Context. #' -#' This function initializes a new SparkContext. +#' This function initializes a new SparkContext. For details on how to initialize +#' and use SparkR, refer to SparkR programming guide at +#' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparkcontext-sqlcontext}. #' #' @param master The Spark master URL. #' @param appName Application name to register with cluster manager @@ -93,7 +95,7 @@ sparkR.stop <- function() { #' sc <- sparkR.init("local[2]", "SparkR", "/home/spark", #' list(spark.executor.memory="1g")) #' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark", -#' list(spark.executor.memory="1g"), +#' list(spark.executor.memory="4g"), #' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"), #' c("jarfile1.jar","jarfile2.jar")) #'} @@ -123,16 +125,21 @@ sparkR.init <- function( uriSep <- "////" } + sparkEnvirMap <- convertNamedListToEnv(sparkEnvir) + existingPort <- Sys.getenv("EXISTING_SPARKR_BACKEND_PORT", "") if (existingPort != "") { backendPort <- existingPort } else { path <- tempfile(pattern = "backend_port") + submitOps <- getClientModeSparkSubmitOpts( + Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"), + sparkEnvirMap) launchBackend( args = path, sparkHome = sparkHome, jars = jars, - sparkSubmitOpts = Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"), + sparkSubmitOpts = submitOps, packages = sparkPackages) # wait atmost 100 seconds for JVM to launch wait <- 0.1 @@ -171,8 +178,6 @@ sparkR.init <- function( sparkHome <- suppressWarnings(normalizePath(sparkHome)) } - sparkEnvirMap <- convertNamedListToEnv(sparkEnvir) - sparkExecutorEnvMap <- convertNamedListToEnv(sparkExecutorEnv) if(is.null(sparkExecutorEnvMap$LD_LIBRARY_PATH)) { sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- @@ -320,3 +325,33 @@ clearJobGroup <- function(sc) { cancelJobGroup <- function(sc, groupId) { callJMethod(sc, "cancelJobGroup", groupId) } + +sparkConfToSubmitOps <- new.env() +sparkConfToSubmitOps[["spark.driver.memory"]] <- "--driver-memory" +sparkConfToSubmitOps[["spark.driver.extraClassPath"]] <- "--driver-class-path" +sparkConfToSubmitOps[["spark.driver.extraJavaOptions"]] <- "--driver-java-options" +sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-path" + +# Utility function that returns Spark Submit arguments as a string +# +# A few Spark Application and Runtime environment properties cannot take effect after driver +# JVM has started, as documented in: +# http://spark.apache.org/docs/latest/configuration.html#application-properties +# When starting SparkR without using spark-submit, for example, from Rstudio, add them to +# spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS so that they can be effective. +getClientModeSparkSubmitOpts <- function(submitOps, sparkEnvirMap) { + envirToOps <- lapply(ls(sparkConfToSubmitOps), function(conf) { + opsValue <- sparkEnvirMap[[conf]] + # process only if --option is not already specified + if (!is.null(opsValue) && + nchar(opsValue) > 1 && + !grepl(sparkConfToSubmitOps[[conf]], submitOps)) { + # put "" around value in case it has spaces + paste0(sparkConfToSubmitOps[[conf]], " \"", opsValue, "\" ") + } else { + "" + } + }) + # --option must be before the application class "sparkr-shell" in submitOps + paste0(paste0(envirToOps, collapse = ""), submitOps) +} diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R index e99815ed15..80c1b89a4c 100644 --- a/R/pkg/inst/tests/test_context.R +++ b/R/pkg/inst/tests/test_context.R @@ -65,3 +65,30 @@ test_that("job group functions can be called", { cancelJobGroup(sc, "groupId") clearJobGroup(sc) }) + +test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", { + e <- new.env() + e[["spark.driver.memory"]] <- "512m" + ops <- getClientModeSparkSubmitOpts("sparkrmain", e) + expect_equal("--driver-memory \"512m\" sparkrmain", ops) + + e[["spark.driver.memory"]] <- "5g" + e[["spark.driver.extraClassPath"]] <- "/opt/class_path" # nolint + e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings" + e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib" # nolint + e[["random"]] <- "skipthis" + ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e) + # nolint start + expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"", + "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"", + "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell")) + # nolint end + + e[["spark.driver.extraClassPath"]] <- "/" # too short + ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e) + # nolint start + expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ", + "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"", + " --driver-memory 4g sparkr-shell2")) + # nolint end +}) |