aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/testthat/test_mllib.R
diff options
context:
space:
mode:
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_mllib.R')
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R63
1 files changed, 43 insertions, 20 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 07df4b6d6f..2a97a51cfa 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -371,12 +371,13 @@ test_that("spark.kmeans", {
test_that("spark.mlp", {
df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
source = "libsvm")
- model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100,
- tol = 0.5, stepSize = 1, seed = 1)
+ model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3),
+ solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1)
# Test summary method
summary <- summary(model)
- expect_equal(summary$labelCount, 3)
+ expect_equal(summary$numOfInputs, 4)
+ expect_equal(summary$numOfOutputs, 3)
expect_equal(summary$layers, c(4, 5, 4, 3))
expect_equal(length(summary$weights), 64)
expect_equal(head(summary$weights, 5), list(-0.878743, 0.2154151, -1.16304, -0.6583214, 1.009825),
@@ -385,7 +386,7 @@ test_that("spark.mlp", {
# Test predict method
mlpTestDF <- df
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 6), c(0, 1, 1, 1, 1, 1))
+ expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0"))
# Test model save/load
modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp")
@@ -395,46 +396,68 @@ test_that("spark.mlp", {
model2 <- read.ml(modelPath)
summary2 <- summary(model2)
- expect_equal(summary2$labelCount, 3)
+ expect_equal(summary2$numOfInputs, 4)
+ expect_equal(summary2$numOfOutputs, 3)
expect_equal(summary2$layers, c(4, 5, 4, 3))
expect_equal(length(summary2$weights), 64)
unlink(modelPath)
# Test default parameter
- model <- spark.mlp(df, layers = c(4, 5, 4, 3))
+ model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3))
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 10), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 0))
+ expect_equal(head(mlpPredictions$prediction, 10),
+ c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
# Test illegal parameter
- expect_error(spark.mlp(df, layers = NULL), "layers must be a integer vector with length > 1.")
- expect_error(spark.mlp(df, layers = c()), "layers must be a integer vector with length > 1.")
- expect_error(spark.mlp(df, layers = c(3)), "layers must be a integer vector with length > 1.")
+ expect_error(spark.mlp(df, label ~ features, layers = NULL),
+ "layers must be a integer vector with length > 1.")
+ expect_error(spark.mlp(df, label ~ features, layers = c()),
+ "layers must be a integer vector with length > 1.")
+ expect_error(spark.mlp(df, label ~ features, layers = c(3)),
+ "layers must be a integer vector with length > 1.")
# Test random seed
# default seed
- model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10)
+ model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 10)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 2, 0, 1))
+ expect_equal(head(mlpPredictions$prediction, 10),
+ c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
# seed equals 10
- model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
+ model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
+ expect_equal(head(mlpPredictions$prediction, 10),
+ c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
# test initialWeights
- model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
+ model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
+ expect_equal(head(mlpPredictions$prediction, 10),
+ c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
- model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
+ model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
+ expect_equal(head(mlpPredictions$prediction, 10),
+ c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
- model <- spark.mlp(df, layers = c(4, 3), maxIter = 2)
+ model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
- expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1))
+ expect_equal(head(mlpPredictions$prediction, 10),
+ c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", "1.0", "0.0"))
+
+ # Test formula works well
+ df <- suppressWarnings(createDataFrame(iris))
+ model <- spark.mlp(df, Species ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
+ layers = c(4, 3))
+ summary <- summary(model)
+ expect_equal(summary$numOfInputs, 4)
+ expect_equal(summary$numOfOutputs, 3)
+ expect_equal(summary$layers, c(4, 3))
+ expect_equal(length(summary$weights), 15)
+ expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 7.4489734, -6.3751413,
+ -10.2376130), tolerance = 1e-6)
})
test_that("spark.naiveBayes", {