diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2016-04-30 08:37:56 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-04-30 08:37:56 -0700 |
commit | 19a6d192d53ce6dffe998ce110adab1f2efcb23e (patch) | |
tree | 6900926371373d8bb072d85441df7840918be1f9 /R/pkg/inst | |
parent | e5fb78baf9a6014b6dd02cf9f528d069732aafca (diff) | |
download | spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.gz spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.bz2 spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.zip |
[SPARK-15030][ML][SPARKR] Support formula in spark.kmeans in SparkR
## What changes were proposed in this pull request?
* ```RFormula``` supports empty response variable like ```~ x + y```.
* Support formula in ```spark.kmeans``` in SparkR.
* Fix some outdated docs for SparkR.
## How was this patch tested?
Unit tests.
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #12813 from yanboliang/spark-15030.
Diffstat (limited to 'R/pkg/inst')
-rw-r--r-- | R/pkg/inst/tests/testthat/test_mllib.R | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index dcd0296a3c..37d87aa8a0 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -132,7 +132,7 @@ test_that("spark.glm save/load", { m <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species) s <- summary(m) - modelPath <- tempfile(pattern = "glm", fileext = ".tmp") + modelPath <- tempfile(pattern = "spark-glm", fileext = ".tmp") write.ml(m, modelPath) expect_error(write.ml(m, modelPath)) write.ml(m, modelPath, overwrite = TRUE) @@ -291,7 +291,7 @@ test_that("spark.kmeans", { take(training, 1) - model <- spark.kmeans(data = training, k = 2) + model <- spark.kmeans(data = training, ~ ., k = 2) sample <- take(select(predict(model, training), "prediction"), 1) expect_equal(typeof(sample$prediction), "integer") expect_equal(sample$prediction, 1) @@ -310,7 +310,7 @@ test_that("spark.kmeans", { expect_equal(sort(collect(distinct(select(cluster, "prediction")))$prediction), c(0, 1)) # Test model save/load - modelPath <- tempfile(pattern = "kmeans", fileext = ".tmp") + modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) write.ml(model, modelPath, overwrite = TRUE) @@ -324,7 +324,7 @@ test_that("spark.kmeans", { unlink(modelPath) }) -test_that("naiveBayes", { +test_that("spark.naiveBayes", { # R code to reproduce the result. # We do not support instance weights yet. So we ignore the frequencies. # @@ -377,7 +377,7 @@ test_that("naiveBayes", { "Yes", "Yes", "No", "No")) # Test model save/load - modelPath <- tempfile(pattern = "naiveBayes", fileext = ".tmp") + modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp") write.ml(m, modelPath) expect_error(write.ml(m, modelPath)) write.ml(m, modelPath, overwrite = TRUE) @@ -434,7 +434,7 @@ test_that("spark.survreg", { 2.390146, 2.891269, 2.891269), tolerance = 1e-4) # Test model save/load - modelPath <- tempfile(pattern = "survreg", fileext = ".tmp") + modelPath <- tempfile(pattern = "spark-survreg", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) write.ml(model, modelPath, overwrite = TRUE) |