[SPARK-15030][ML][SPARKR] Support formula in spark.kmeans in SparkR

## What changes were proposed in this pull request? * ```RFormula``` supports empty response variable like ```~ x + y```. * Support formula in ```spark.kmeans``` in SparkR. * Fix some outdated docs for SparkR. ## How was this patch tested? Unit tests. Author: Yanbo Liang <ybliang8@gmail.com> Closes #12813 from yanboliang/spark-15030.
author: Yanbo Liang <ybliang8@gmail.com> 2016-04-30 08:37:56 -0700
committer: Xiangrui Meng <meng@databricks.com> 2016-04-30 08:37:56 -0700
commit: 19a6d192d53ce6dffe998ce110adab1f2efcb23e (patch)
tree: 6900926371373d8bb072d85441df7840918be1f9 /R/pkg/inst
parent: e5fb78baf9a6014b6dd02cf9f528d069732aafca (diff)
download: spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.gz
spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.bz2
spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.zip
1 files changed, 6 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index dcd0296a3c..37d87aa8a0 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -132,7 +132,7 @@ test_that("spark.glm save/load", {
   m <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
   s <- summary(m)
 
-  modelPath <- tempfile(pattern = "glm", fileext = ".tmp")
+  modelPath <- tempfile(pattern = "spark-glm", fileext = ".tmp")
   write.ml(m, modelPath)
   expect_error(write.ml(m, modelPath))
   write.ml(m, modelPath, overwrite = TRUE)
@@ -291,7 +291,7 @@ test_that("spark.kmeans", {
 
   take(training, 1)
 
-  model <- spark.kmeans(data = training, k = 2)
+  model <- spark.kmeans(data = training, ~ ., k = 2)
   sample <- take(select(predict(model, training), "prediction"), 1)
   expect_equal(typeof(sample$prediction), "integer")
   expect_equal(sample$prediction, 1)
@@ -310,7 +310,7 @@ test_that("spark.kmeans", {
   expect_equal(sort(collect(distinct(select(cluster, "prediction")))$prediction), c(0, 1))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "kmeans", fileext = ".tmp")
+  modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp")
   write.ml(model, modelPath)
   expect_error(write.ml(model, modelPath))
   write.ml(model, modelPath, overwrite = TRUE)
@@ -324,7 +324,7 @@ test_that("spark.kmeans", {
   unlink(modelPath)
 })
 
-test_that("naiveBayes", {
+test_that("spark.naiveBayes", {
   # R code to reproduce the result.
   # We do not support instance weights yet. So we ignore the frequencies.
   #
@@ -377,7 +377,7 @@ test_that("naiveBayes", {
                                "Yes", "Yes", "No", "No"))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "naiveBayes", fileext = ".tmp")
+  modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp")
   write.ml(m, modelPath)
   expect_error(write.ml(m, modelPath))
   write.ml(m, modelPath, overwrite = TRUE)
@@ -434,7 +434,7 @@ test_that("spark.survreg", {
                2.390146, 2.891269, 2.891269), tolerance = 1e-4)
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "survreg", fileext = ".tmp")
+  modelPath <- tempfile(pattern = "spark-survreg", fileext = ".tmp")
   write.ml(model, modelPath)
   expect_error(write.ml(model, modelPath))
   write.ml(model, modelPath, overwrite = TRUE)
author	Yanbo Liang <ybliang8@gmail.com>	2016-04-30 08:37:56 -0700
committer	Xiangrui Meng <meng@databricks.com>	2016-04-30 08:37:56 -0700
commit	19a6d192d53ce6dffe998ce110adab1f2efcb23e (patch)
tree	6900926371373d8bb072d85441df7840918be1f9 /R/pkg/inst
parent	e5fb78baf9a6014b6dd02cf9f528d069732aafca (diff)
download	spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.gz spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.bz2 spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.zip