[SPARK-15030][ML][SPARKR] Support formula in spark.kmeans in SparkR

## What changes were proposed in this pull request? * ```RFormula``` supports empty response variable like ```~ x + y```. * Support formula in ```spark.kmeans``` in SparkR. * Fix some outdated docs for SparkR. ## How was this patch tested? Unit tests. Author: Yanbo Liang <ybliang8@gmail.com> Closes #12813 from yanboliang/spark-15030.
author: Yanbo Liang <ybliang8@gmail.com> 2016-04-30 08:37:56 -0700
committer: Xiangrui Meng <meng@databricks.com> 2016-04-30 08:37:56 -0700
commit: 19a6d192d53ce6dffe998ce110adab1f2efcb23e (patch)
tree: 6900926371373d8bb072d85441df7840918be1f9 /mllib/src/test/scala
parent: e5fb78baf9a6014b6dd02cf9f528d069732aafca (diff)
download: spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.gz
spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.bz2
spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.zip
1 files changed, 19 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
index e1b269b5b6..f8476953d8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.Row
 
 class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
   test("params") {
@@ -89,6 +90,24 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
     assert(resultSchema.toString == model.transform(original).schema.toString)
   }
 
+  test("allow empty label") {
+    val original = sqlContext.createDataFrame(
+      Seq((1, 2.0, 3.0), (4, 5.0, 6.0), (7, 8.0, 9.0))
+    ).toDF("id", "a", "b")
+    val formula = new RFormula().setFormula("~ a + b")
+    val model = formula.fit(original)
+    val result = model.transform(original)
+    val resultSchema = model.transformSchema(original.schema)
+    val expected = sqlContext.createDataFrame(
+      Seq(
+        (1, 2.0, 3.0, Vectors.dense(2.0, 3.0)),
+        (4, 5.0, 6.0, Vectors.dense(5.0, 6.0)),
+        (7, 8.0, 9.0, Vectors.dense(8.0, 9.0)))
+      ).toDF("id", "a", "b", "features")
+    assert(result.schema.toString == resultSchema.toString)
+    assert(result.collect() === expected.collect())
+  }
+
   test("encodes string terms") {
     val formula = new RFormula().setFormula("id ~ a + b")
     val original = sqlContext.createDataFrame(
author	Yanbo Liang <ybliang8@gmail.com>	2016-04-30 08:37:56 -0700
committer	Xiangrui Meng <meng@databricks.com>	2016-04-30 08:37:56 -0700
commit	19a6d192d53ce6dffe998ce110adab1f2efcb23e (patch)
tree	6900926371373d8bb072d85441df7840918be1f9 /mllib/src/test/scala
parent	e5fb78baf9a6014b6dd02cf9f528d069732aafca (diff)
download	spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.gz spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.tar.bz2 spark-19a6d192d53ce6dffe998ce110adab1f2efcb23e.zip