aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/mllib.R21
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R9
2 files changed, 25 insertions, 5 deletions
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 265e64e746..02bc6456de 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -278,8 +278,10 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
#' @param object a fitted generalized linear model.
#' @return \code{summary} returns a summary object of the fitted model, a list of components
-#' including at least the coefficients, null/residual deviance, null/residual degrees
-#' of freedom, AIC and number of iterations IRLS takes.
+#' including at least the coefficients matrix (which includes coefficients, standard error
+#' of coefficients, t value and p value), null/residual deviance, null/residual degrees of
+#' freedom, AIC and number of iterations IRLS takes. If there are collinear columns
+#' in you data, the coefficients matrix only provides coefficients.
#'
#' @rdname spark.glm
#' @export
@@ -303,9 +305,18 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
} else {
dataFrame(callJMethod(jobj, "rDevianceResiduals"))
}
- coefficients <- matrix(coefficients, ncol = 4)
- colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
- rownames(coefficients) <- unlist(features)
+ # If the underlying WeightedLeastSquares using "normal" solver, we can provide
+ # coefficients, standard error of coefficients, t value and p value. Otherwise,
+ # it will be fitted by local "l-bfgs", we can only provide coefficients.
+ if (length(features) == length(coefficients)) {
+ coefficients <- matrix(coefficients, ncol = 1)
+ colnames(coefficients) <- c("Estimate")
+ rownames(coefficients) <- unlist(features)
+ } else {
+ coefficients <- matrix(coefficients, ncol = 4)
+ colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
+ rownames(coefficients) <- unlist(features)
+ }
ans <- list(deviance.resid = deviance.resid, coefficients = coefficients,
dispersion = dispersion, null.deviance = null.deviance,
deviance = deviance, df.null = df.null, df.residual = df.residual,
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 2a97a51cfa..467e00cf79 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -169,6 +169,15 @@ test_that("spark.glm summary", {
df <- suppressWarnings(createDataFrame(data))
regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
expect_equal(regStats$aic, 14.00976, tolerance = 1e-4) # 14.00976 is from summary() result
+
+ # Test spark.glm works on collinear data
+ A <- matrix(c(1, 2, 3, 4, 2, 4, 6, 8), 4, 2)
+ b <- c(1, 2, 3, 4)
+ data <- as.data.frame(cbind(A, b))
+ df <- createDataFrame(data)
+ stats <- summary(spark.glm(df, b ~ . - 1))
+ coefs <- unlist(stats$coefficients)
+ expect_true(all(abs(c(0.5, 0.25) - coefs) < 1e-4))
})
test_that("spark.glm save/load", {