aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE3
-rw-r--r--R/pkg/R/mllib.R49
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R49
3 files changed, 97 insertions, 4 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index f48c61c1d5..94ac7e7df7 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -292,7 +292,8 @@ export("as.DataFrame",
"tableToDF",
"tableNames",
"tables",
- "uncacheTable")
+ "uncacheTable",
+ "print.summary.GeneralizedLinearRegressionModel")
export("structField",
"structField.jobj",
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 31bca16580..922a9b13db 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -101,12 +101,55 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
jobj <- object@jobj
features <- callJMethod(jobj, "rFeatures")
coefficients <- callJMethod(jobj, "rCoefficients")
- coefficients <- as.matrix(unlist(coefficients))
- colnames(coefficients) <- c("Estimate")
+ deviance.resid <- callJMethod(jobj, "rDevianceResiduals")
+ dispersion <- callJMethod(jobj, "rDispersion")
+ null.deviance <- callJMethod(jobj, "rNullDeviance")
+ deviance <- callJMethod(jobj, "rDeviance")
+ df.null <- callJMethod(jobj, "rResidualDegreeOfFreedomNull")
+ df.residual <- callJMethod(jobj, "rResidualDegreeOfFreedom")
+ aic <- callJMethod(jobj, "rAic")
+ iter <- callJMethod(jobj, "rNumIterations")
+ family <- callJMethod(jobj, "rFamily")
+
+ deviance.resid <- dataFrame(deviance.resid)
+ coefficients <- matrix(coefficients, ncol = 4)
+ colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
rownames(coefficients) <- unlist(features)
- return(list(coefficients = coefficients))
+ ans <- list(deviance.resid = deviance.resid, coefficients = coefficients,
+ dispersion = dispersion, null.deviance = null.deviance,
+ deviance = deviance, df.null = df.null, df.residual = df.residual,
+ aic = aic, iter = iter, family = family)
+ class(ans) <- "summary.GeneralizedLinearRegressionModel"
+ return(ans)
})
+#' Print the summary of GeneralizedLinearRegressionModel
+#'
+#' @rdname print
+#' @name print.summary.GeneralizedLinearRegressionModel
+#' @export
+print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
+ x$deviance.resid <- setNames(unlist(approxQuantile(x$deviance.resid, "devianceResiduals",
+ c(0.0, 0.25, 0.5, 0.75, 1.0), 0.01)), c("Min", "1Q", "Median", "3Q", "Max"))
+ x$deviance.resid <- zapsmall(x$deviance.resid, 5L)
+ cat("\nDeviance Residuals: \n")
+ cat("(Note: These are approximate quantiles with relative error <= 0.01)\n")
+ print.default(x$deviance.resid, digits = 5L, na.print = "", print.gap = 2L)
+
+ cat("\nCoefficients:\n")
+ print.default(x$coefficients, digits = 5L, na.print = "", print.gap = 2L)
+
+ cat("\n(Dispersion parameter for ", x$family, " family taken to be ", format(x$dispersion),
+ ")\n\n", apply(cbind(paste(format(c("Null", "Residual"), justify = "right"), "deviance:"),
+ format(unlist(x[c("null.deviance", "deviance")]), digits = 5L),
+ " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"),
+ 1L, paste, collapse = " "), sep = "")
+ cat("AIC: ", format(x$aic, digits = 4L), "\n\n",
+ "Number of Fisher Scoring iterations: ", x$iter, "\n", sep = "")
+ cat("\n")
+ invisible(x)
+ }
+
#' Make predictions from a generalized linear model
#'
#' Makes predictions from a generalized linear model produced by glm(), similarly to R's predict().
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index a9dbd2bdc4..47bbf7e5bd 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -77,6 +77,55 @@ test_that("glm and predict", {
expect_equal(length(predict(lm(y ~ x))), 15)
})
+test_that("glm summary", {
+ # gaussian family
+ training <- suppressWarnings(createDataFrame(sqlContext, iris))
+ stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training))
+
+ rStats <- summary(glm(Sepal.Width ~ Sepal.Length + Species, data = iris))
+
+ coefs <- unlist(stats$coefficients)
+ rCoefs <- unlist(rStats$coefficients)
+ expect_true(all(abs(rCoefs - coefs) < 1e-4))
+ expect_true(all(
+ rownames(stats$coefficients) ==
+ c("(Intercept)", "Sepal_Length", "Species_versicolor", "Species_virginica")))
+ expect_equal(stats$dispersion, rStats$dispersion)
+ expect_equal(stats$null.deviance, rStats$null.deviance)
+ expect_equal(stats$deviance, rStats$deviance)
+ expect_equal(stats$df.null, rStats$df.null)
+ expect_equal(stats$df.residual, rStats$df.residual)
+ expect_equal(stats$aic, rStats$aic)
+
+ # binomial family
+ df <- suppressWarnings(createDataFrame(sqlContext, iris))
+ training <- df[df$Species %in% c("versicolor", "virginica"), ]
+ stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
+ family = binomial(link = "logit")))
+
+ rTraining <- iris[iris$Species %in% c("versicolor", "virginica"), ]
+ rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining,
+ family = binomial(link = "logit")))
+
+ coefs <- unlist(stats$coefficients)
+ rCoefs <- unlist(rStats$coefficients)
+ expect_true(all(abs(rCoefs - coefs) < 1e-4))
+ expect_true(all(
+ rownames(stats$coefficients) ==
+ c("(Intercept)", "Sepal_Length", "Sepal_Width")))
+ expect_equal(stats$dispersion, rStats$dispersion)
+ expect_equal(stats$null.deviance, rStats$null.deviance)
+ expect_equal(stats$deviance, rStats$deviance)
+ expect_equal(stats$df.null, rStats$df.null)
+ expect_equal(stats$df.residual, rStats$df.residual)
+ expect_equal(stats$aic, rStats$aic)
+
+ # Test summary works on base GLM models
+ baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data = iris)
+ baseSummary <- summary(baseModel)
+ expect_true(abs(baseSummary$deviance - 12.19313) < 1e-4)
+})
+
test_that("kmeans", {
newIris <- iris
newIris$Species <- NULL