From 12b3e8d2e02788c3bebfecdd69755e94d80011c9 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Tue, 25 Oct 2016 21:42:59 -0700 Subject: [SPARK-18007][SPARKR][ML] update SparkR MLP - add initalWeights parameter ## What changes were proposed in this pull request? update SparkR MLP, add initalWeights parameter. ## How was this patch tested? test added. Author: WeichenXu Closes #15552 from WeichenXu123/mlp_r_add_initialWeight_param. --- R/pkg/R/mllib.R | 14 ++++++++++---- R/pkg/inst/tests/testthat/test_mllib.R | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'R/pkg') diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index b901307f8f..bf182be8e2 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -665,6 +665,8 @@ setMethod("predict", signature(object = "KMeansModel"), #' @param tol convergence tolerance of iterations. #' @param stepSize stepSize parameter. #' @param seed seed parameter for weights initialization. +#' @param initialWeights initialWeights parameter for weights initialization, it should be a +#' numeric vector. #' @param ... additional arguments passed to the method. #' @return \code{spark.mlp} returns a fitted Multilayer Perceptron Classification Model. #' @rdname spark.mlp @@ -677,8 +679,9 @@ setMethod("predict", signature(object = "KMeansModel"), #' df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") #' #' # fit a Multilayer Perceptron Classification Model -#' model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", -#' maxIter = 100, tol = 0.5, stepSize = 1, seed = 1) +#' model <- spark.mlp(df, blockSize = 128, layers = c(4, 3), solver = "l-bfgs", +#' maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, +#' initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9)) #' #' # get the summary of the model #' summary(model) @@ -695,7 +698,7 @@ setMethod("predict", signature(object = "KMeansModel"), #' @note spark.mlp since 2.1.0 setMethod("spark.mlp", signature(data = "SparkDataFrame"), function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100, - tol = 1E-6, stepSize = 0.03, seed = NULL) { + tol = 1E-6, stepSize = 0.03, seed = NULL, initialWeights = NULL) { if (is.null(layers)) { stop ("layers must be a integer vector with length > 1.") } @@ -706,10 +709,13 @@ setMethod("spark.mlp", signature(data = "SparkDataFrame"), if (!is.null(seed)) { seed <- as.character(as.integer(seed)) } + if (!is.null(initialWeights)) { + initialWeights <- as.array(as.numeric(na.omit(initialWeights))) + } jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper", "fit", data@sdf, as.integer(blockSize), as.array(layers), as.character(solver), as.integer(maxIter), as.numeric(tol), - as.numeric(stepSize), seed) + as.numeric(stepSize), seed, initialWeights) new("MultilayerPerceptronClassificationModel", jobj = jobj) }) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index c99315726a..33cc069f14 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -410,6 +410,21 @@ test_that("spark.mlp", { model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1)) + + # test initialWeights + model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights = + c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9)) + mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) + expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1)) + + model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights = + c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0)) + mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) + expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1)) + + model <- spark.mlp(df, layers = c(4, 3), maxIter = 2) + mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) + expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1)) }) test_that("spark.naiveBayes", { -- cgit v1.2.3