[SPARK-18007][SPARKR][ML] update SparkR MLP - add initalWeights parameter

## What changes were proposed in this pull request? update SparkR MLP, add initalWeights parameter. ## How was this patch tested? test added. Author: WeichenXu <WeichenXu123@outlook.com> Closes #15552 from WeichenXu123/mlp_r_add_initialWeight_param.
author: WeichenXu <WeichenXu123@outlook.com> 2016-10-25 21:42:59 -0700
committer: Felix Cheung <felixcheung@apache.org> 2016-10-25 21:42:59 -0700
commit: 12b3e8d2e02788c3bebfecdd69755e94d80011c9 (patch)
tree: a8577ebadef6f612401fb7bd92d22d23f4a30ced /R/pkg
parent: c329a568b58d65c492a43926bf0f588f2ae6a66e (diff)
download: spark-12b3e8d2e02788c3bebfecdd69755e94d80011c9.tar.gz
spark-12b3e8d2e02788c3bebfecdd69755e94d80011c9.tar.bz2
spark-12b3e8d2e02788c3bebfecdd69755e94d80011c9.zip
2 files changed, 25 insertions, 4 deletions
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index b901307f8f..bf182be8e2 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -665,6 +665,8 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' @param tol convergence tolerance of iterations.
 #' @param stepSize stepSize parameter.
 #' @param seed seed parameter for weights initialization.
+#' @param initialWeights initialWeights parameter for weights initialization, it should be a
+#' numeric vector.
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.mlp} returns a fitted Multilayer Perceptron Classification Model.
 #' @rdname spark.mlp
@@ -677,8 +679,9 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
 #'
 #' # fit a Multilayer Perceptron Classification Model
-#' model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs",
-#'                    maxIter = 100, tol = 0.5, stepSize = 1, seed = 1)
+#' model <- spark.mlp(df, blockSize = 128, layers = c(4, 3), solver = "l-bfgs",
+#'                    maxIter = 100, tol = 0.5, stepSize = 1, seed = 1,
+#'                    initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
 #'
 #' # get the summary of the model
 #' summary(model)
@@ -695,7 +698,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' @note spark.mlp since 2.1.0
 setMethod("spark.mlp", signature(data = "SparkDataFrame"),
           function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
-                   tol = 1E-6, stepSize = 0.03, seed = NULL) {
+                   tol = 1E-6, stepSize = 0.03, seed = NULL, initialWeights = NULL) {
             if (is.null(layers)) {
               stop ("layers must be a integer vector with length > 1.")
             }
@@ -706,10 +709,13 @@ setMethod("spark.mlp", signature(data = "SparkDataFrame"),
             if (!is.null(seed)) {
               seed <- as.character(as.integer(seed))
             }
+            if (!is.null(initialWeights)) {
+              initialWeights <- as.array(as.numeric(na.omit(initialWeights)))
+            }
             jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper",
                                 "fit", data@sdf, as.integer(blockSize), as.array(layers),
                                 as.character(solver), as.integer(maxIter), as.numeric(tol),
-                                as.numeric(stepSize), seed)
+                                as.numeric(stepSize), seed, initialWeights)
             new("MultilayerPerceptronClassificationModel", jobj = jobj)
           })
 
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index c99315726a..33cc069f14 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -410,6 +410,21 @@ test_that("spark.mlp", {
   model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
+
+  # test initialWeights
+  model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
+    c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
+  mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
+  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
+
+  model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
+    c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
+  mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
+  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
+
+  model <- spark.mlp(df, layers = c(4, 3), maxIter = 2)
+  mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
+  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1))
 })
 
 test_that("spark.naiveBayes", {
author	WeichenXu <WeichenXu123@outlook.com>	2016-10-25 21:42:59 -0700
committer	Felix Cheung <felixcheung@apache.org>	2016-10-25 21:42:59 -0700
commit	12b3e8d2e02788c3bebfecdd69755e94d80011c9 (patch)
tree	a8577ebadef6f612401fb7bd92d22d23f4a30ced /R/pkg
parent	c329a568b58d65c492a43926bf0f588f2ae6a66e (diff)
download	spark-12b3e8d2e02788c3bebfecdd69755e94d80011c9.tar.gz spark-12b3e8d2e02788c3bebfecdd69755e94d80011c9.tar.bz2 spark-12b3e8d2e02788c3bebfecdd69755e94d80011c9.zip