From e328b69c31821e4b27673d7ef6182ab3b7a05ca8 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 4 Nov 2015 08:28:33 -0800 Subject: [SPARK-9492][ML][R] LogisticRegression in R should provide model statistics Like ml ```LinearRegression```, ```LogisticRegression``` should provide a training summary including feature names and their coefficients. Author: Yanbo Liang Closes #9303 from yanboliang/spark-9492. --- R/pkg/inst/tests/test_mllib.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'R/pkg/inst/tests/test_mllib.R') diff --git a/R/pkg/inst/tests/test_mllib.R b/R/pkg/inst/tests/test_mllib.R index 3331ce7383..032cfef061 100644 --- a/R/pkg/inst/tests/test_mllib.R +++ b/R/pkg/inst/tests/test_mllib.R @@ -67,3 +67,20 @@ test_that("summary coefficients match with native glm", { as.character(stats$features) == c("(Intercept)", "Sepal_Length", "Species_versicolor", "Species_virginica"))) }) + +test_that("summary coefficients match with native glm of family 'binomial'", { + df <- createDataFrame(sqlContext, iris) + training <- filter(df, df$Species != "setosa") + stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training, + family = "binomial")) + coefs <- as.vector(stats$coefficients) + + rTraining <- iris[iris$Species %in% c("versicolor","virginica"),] + rCoefs <- as.vector(coef(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining, + family = binomial(link = "logit")))) + + expect_true(all(abs(rCoefs - coefs) < 1e-4)) + expect_true(all( + as.character(stats$features) == + c("(Intercept)", "Sepal_Length", "Sepal_Width"))) +}) -- cgit v1.2.3