aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2016-06-21 08:31:15 -0700
committerXiangrui Meng <meng@databricks.com>2016-06-21 08:31:15 -0700
commit4f83ca1059a3b580fca3f006974ff5ac4d5212a1 (patch)
tree0d1c12a4c1f67574acdad034bc909e17117fa10b /R/pkg/inst
parentf3a768b7b96f00f33d2fe4e6c0bf4acf373ad4f4 (diff)
downloadspark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.tar.gz
spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.tar.bz2
spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.zip
[SPARK-15177][.1][R] make SparkR model params and default values consistent with MLlib
## What changes were proposed in this pull request? This PR is a subset of #13023 by yanboliang to make SparkR model param names and default values consistent with MLlib. I tried to avoid other changes from #13023 to keep this PR minimal. I will send a follow-up PR to improve the documentation. Main changes: * `spark.glm`: epsilon -> tol, maxit -> maxIter * `spark.kmeans`: default k -> 2, default maxIter -> 20, default initMode -> "k-means||" * `spark.naiveBayes`: laplace -> smoothing, default 1.0 ## How was this patch tested? Existing unit tests. Author: Xiangrui Meng <meng@databricks.com> Closes #13801 from mengxr/SPARK-15177.1.
Diffstat (limited to 'R/pkg/inst')
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R4
1 files changed, 2 insertions, 2 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index c8c5ef2476..753da81760 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -288,7 +288,7 @@ test_that("spark.kmeans", {
take(training, 1)
- model <- spark.kmeans(data = training, ~ ., k = 2)
+ model <- spark.kmeans(data = training, ~ ., k = 2, maxIter = 10, initMode = "random")
sample <- take(select(predict(model, training), "prediction"), 1)
expect_equal(typeof(sample$prediction), "integer")
expect_equal(sample$prediction, 1)
@@ -363,7 +363,7 @@ test_that("spark.naiveBayes", {
t <- as.data.frame(Titanic)
t1 <- t[t$Freq > 0, -5]
df <- suppressWarnings(createDataFrame(t1))
- m <- spark.naiveBayes(df, Survived ~ .)
+ m <- spark.naiveBayes(df, Survived ~ ., smoothing = 0.0)
s <- summary(m)
expect_equal(as.double(s$apriori[1, "Yes"]), 0.5833333, tolerance = 1e-6)
expect_equal(sum(s$apriori), 1)