aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-11-22 19:17:48 -0800
committerYanbo Liang <ybliang8@gmail.com>2016-11-22 19:17:48 -0800
commit982b82e32e0fc7d30c5d557944a79eb3e6d2da59 (patch)
treead9a9b28a282ffe8a4fe06c65abba8acd3339fb3 /R/pkg/inst
parentd0212eb0f22473ee5482fe98dafc24e16ffcfc63 (diff)
downloadspark-982b82e32e0fc7d30c5d557944a79eb3e6d2da59.tar.gz
spark-982b82e32e0fc7d30c5d557944a79eb3e6d2da59.tar.bz2
spark-982b82e32e0fc7d30c5d557944a79eb3e6d2da59.zip
[SPARK-18501][ML][SPARKR] Fix spark.glm errors when fitting on collinear data
## What changes were proposed in this pull request? * Fix SparkR ```spark.glm``` errors when fitting on collinear data, since ```standard error of coefficients, t value and p value``` are not available in this condition. * Scala/Python GLM summary should throw exception if users get ```standard error of coefficients, t value and p value``` but the underlying WLS was solved by local "l-bfgs". ## How was this patch tested? Add unit tests. Author: Yanbo Liang <ybliang8@gmail.com> Closes #15930 from yanboliang/spark-18501.
Diffstat (limited to 'R/pkg/inst')
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R9
1 files changed, 9 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 2a97a51cfa..467e00cf79 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -169,6 +169,15 @@ test_that("spark.glm summary", {
df <- suppressWarnings(createDataFrame(data))
regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
expect_equal(regStats$aic, 14.00976, tolerance = 1e-4) # 14.00976 is from summary() result
+
+ # Test spark.glm works on collinear data
+ A <- matrix(c(1, 2, 3, 4, 2, 4, 6, 8), 4, 2)
+ b <- c(1, 2, 3, 4)
+ data <- as.data.frame(cbind(A, b))
+ df <- createDataFrame(data)
+ stats <- summary(spark.glm(df, b ~ . - 1))
+ coefs <- unlist(stats$coefficients)
+ expect_true(all(abs(c(0.5, 0.25) - coefs) < 1e-4))
})
test_that("spark.glm save/load", {