aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--R/pkg/R/mllib.R13
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R19
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala8
3 files changed, 33 insertions, 7 deletions
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 98db367a85..971c16658f 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -694,12 +694,19 @@ setMethod("predict", signature(object = "KMeansModel"),
#' }
#' @note spark.mlp since 2.1.0
setMethod("spark.mlp", signature(data = "SparkDataFrame"),
- function(data, blockSize = 128, layers = c(3, 5, 2), solver = "l-bfgs", maxIter = 100,
- tol = 0.5, stepSize = 1, seed = 1) {
+ function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
+ tol = 1E-6, stepSize = 0.03, seed = NULL) {
+ layers <- as.integer(na.omit(layers))
+ if (length(layers) <= 1) {
+ stop ("layers must be a integer vector with length > 1.")
+ }
+ if (!is.null(seed)) {
+ seed <- as.character(as.integer(seed))
+ }
jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper",
"fit", data@sdf, as.integer(blockSize), as.array(layers),
as.character(solver), as.integer(maxIter), as.numeric(tol),
- as.numeric(stepSize), as.integer(seed))
+ as.numeric(stepSize), seed)
new("MultilayerPerceptronClassificationModel", jobj = jobj)
})
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 24c40a8823..a1eaaf2091 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -391,6 +391,25 @@ test_that("spark.mlp", {
unlink(modelPath)
+ # Test default parameter
+ model <- spark.mlp(df, layers = c(4, 5, 4, 3))
+ mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
+ expect_equal(head(mlpPredictions$prediction, 10), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 0))
+
+ # Test illegal parameter
+ expect_error(spark.mlp(df, layers = NULL), "layers must be a integer vector with length > 1.")
+ expect_error(spark.mlp(df, layers = c()), "layers must be a integer vector with length > 1.")
+ expect_error(spark.mlp(df, layers = c(3)), "layers must be a integer vector with length > 1.")
+
+ # Test random seed
+ # default seed
+ model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10)
+ mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
+ expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 2, 0, 1))
+ # seed equals 10
+ model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
+ mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
+ expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
})
test_that("spark.naiveBayes", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
index be51e74187..1067300353 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
@@ -53,26 +53,26 @@ private[r] object MultilayerPerceptronClassifierWrapper
def fit(
data: DataFrame,
blockSize: Int,
- layers: Array[Double],
+ layers: Array[Int],
solver: String,
maxIter: Int,
tol: Double,
stepSize: Double,
- seed: Int
+ seed: String
): MultilayerPerceptronClassifierWrapper = {
// get labels and feature names from output schema
val schema = data.schema
// assemble and fit the pipeline
val mlp = new MultilayerPerceptronClassifier()
- .setLayers(layers.map(_.toInt))
+ .setLayers(layers)
.setBlockSize(blockSize)
.setSolver(solver)
.setMaxIter(maxIter)
.setTol(tol)
.setStepSize(stepSize)
- .setSeed(seed)
.setPredictionCol(PREDICTED_LABEL_COL)
+ if (seed != null && seed.length > 0) mlp.setSeed(seed.toInt)
val pipeline = new Pipeline()
.setStages(Array(mlp))
.fit(data)