aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorEric Liang <ekl@databricks.com>2015-07-28 14:16:57 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-28 14:16:57 -0700
commit8d5bb5283c3cc9180ef34b05be4a715d83073b1e (patch)
treec91d0261b5212032a129bcad4d772f1b183a7ea8 /R/pkg
parent6cdcc21fe654ac0a2d0d72783eb10005fc513af6 (diff)
downloadspark-8d5bb5283c3cc9180ef34b05be4a715d83073b1e.tar.gz
spark-8d5bb5283c3cc9180ef34b05be4a715d83073b1e.tar.bz2
spark-8d5bb5283c3cc9180ef34b05be4a715d83073b1e.zip
[SPARK-9391] [ML] Support minus, dot, and intercept operators in SparkR RFormula
Adds '.', '-', and intercept parsing to RFormula. Also splits RFormulaParser into a separate file. Umbrella design doc here: https://docs.google.com/document/d/10NZNSEurN2EdWM31uFYsgayIPfCFHiuIu3pCWrUmP_c/edit?usp=sharing mengxr Author: Eric Liang <ekl@databricks.com> Closes #7707 from ericl/string-features-2 and squashes the following commits: 8588625 [Eric Liang] exclude complex types for . 8106ffe [Eric Liang] comments a9350bb [Eric Liang] s/var/val 9c50d4d [Eric Liang] Merge branch 'string-features' into string-features-2 581afb2 [Eric Liang] Merge branch 'master' into string-features 08ae539 [Eric Liang] Merge branch 'string-features' into string-features-2 f99131a [Eric Liang] comments cecec43 [Eric Liang] Merge branch 'string-features' into string-features-2 0bf3c26 [Eric Liang] update docs 4592df2 [Eric Liang] intercept supports 7412a2e [Eric Liang] Fri Jul 24 14:56:51 PDT 2015 3cf848e [Eric Liang] fix the parser 0556c2b [Eric Liang] Merge branch 'string-features' into string-features-2 c302a2c [Eric Liang] fix tests 9d1ac82 [Eric Liang] Merge remote-tracking branch 'upstream/master' into string-features e713da3 [Eric Liang] comments cd231a9 [Eric Liang] Wed Jul 22 17:18:44 PDT 2015 4d79193 [Eric Liang] revert to seq + distinct 169a085 [Eric Liang] tweak functional test a230a47 [Eric Liang] Merge branch 'master' into string-features 72bd6f3 [Eric Liang] fix merge d841cec [Eric Liang] Merge branch 'master' into string-features 5b2c4a2 [Eric Liang] Mon Jul 20 18:45:33 PDT 2015 b01c7c5 [Eric Liang] add test 8a637db [Eric Liang] encoder wip a1d03f4 [Eric Liang] refactor into estimator
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/R/mllib.R2
-rw-r--r--R/pkg/inst/tests/test_mllib.R8
2 files changed, 9 insertions, 1 deletions
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 258e354081..6a8bacaa55 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -27,7 +27,7 @@ setClass("PipelineModel", representation(model = "jobj"))
#' Fits a generalized linear model, similarly to R's glm(). Also see the glmnet package.
#'
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
-#' operators are supported, including '~' and '+'.
+#' operators are supported, including '~', '+', '-', and '.'.
#' @param data DataFrame for training
#' @param family Error distribution. "gaussian" -> linear regression, "binomial" -> logistic reg.
#' @param lambda Regularization parameter
diff --git a/R/pkg/inst/tests/test_mllib.R b/R/pkg/inst/tests/test_mllib.R
index 29152a1168..3bef693247 100644
--- a/R/pkg/inst/tests/test_mllib.R
+++ b/R/pkg/inst/tests/test_mllib.R
@@ -40,3 +40,11 @@ test_that("predictions match with native glm", {
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})
+
+test_that("dot minus and intercept vs native glm", {
+ training <- createDataFrame(sqlContext, iris)
+ model <- glm(Sepal_Width ~ . - Species + 0, data = training)
+ vals <- collect(select(predict(model, training), "prediction"))
+ rVals <- predict(glm(Sepal.Width ~ . - Species + 0, data = iris), iris)
+ expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+})