aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests
diff options
context:
space:
mode:
authoractuaryzhang <actuaryzhang10@gmail.com>2017-03-14 00:50:38 -0700
committerFelix Cheung <felixcheung@apache.org>2017-03-14 00:50:38 -0700
commitf6314eab4b494bd5b5e9e41c6f582d4f22c0967a (patch)
treeff067df4be9eb6f3b660abf8332136d778201146 /R/pkg/inst/tests
parent415f9f3423aacc395097e40427364c921a2ed7f1 (diff)
downloadspark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.tar.gz
spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.tar.bz2
spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.zip
[SPARK-19391][SPARKR][ML] Tweedie GLM API for SparkR
## What changes were proposed in this pull request? Port Tweedie GLM #16344 to SparkR felixcheung yanboliang ## How was this patch tested? new test in SparkR Author: actuaryzhang <actuaryzhang10@gmail.com> Closes #16729 from actuaryzhang/sparkRTweedie.
Diffstat (limited to 'R/pkg/inst/tests')
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib_regression.R38
1 files changed, 37 insertions, 1 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R
index 81a5bdc414..3e9ad77198 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_regression.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_regression.R
@@ -77,6 +77,24 @@ test_that("spark.glm and predict", {
out <- capture.output(print(summary(model)))
expect_true(any(grepl("Dispersion parameter for gamma family", out)))
+ # tweedie family
+ model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
+ family = "tweedie", var.power = 1.2, link.power = 0.0)
+ prediction <- predict(model, training)
+ expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+ vals <- collect(select(prediction, "prediction"))
+
+ # manual calculation of the R predicted values to avoid dependence on statmod
+ #' library(statmod)
+ #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+ #' family = tweedie(var.power = 1.2, link.power = 0.0))
+ #' print(coef(rModel))
+
+ rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+ rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+ data = iris) %*% rCoef))
+ expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
# Test stats::predict is working
x <- rnorm(15)
y <- x + rnorm(15)
@@ -233,7 +251,7 @@ test_that("glm and predict", {
training <- suppressWarnings(createDataFrame(iris))
# gaussian family
model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
- prediction <- predict(model, training)
+ prediction <- predict(model, training)
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
vals <- collect(select(prediction, "prediction"))
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
@@ -249,6 +267,24 @@ test_that("glm and predict", {
data = iris, family = poisson(link = identity)), iris))
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+ # tweedie family
+ model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training,
+ family = "tweedie", var.power = 1.2, link.power = 0.0)
+ prediction <- predict(model, training)
+ expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+ vals <- collect(select(prediction, "prediction"))
+
+ # manual calculation of the R predicted values to avoid dependence on statmod
+ #' library(statmod)
+ #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+ #' family = tweedie(var.power = 1.2, link.power = 0.0))
+ #' print(coef(rModel))
+
+ rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+ rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+ data = iris) %*% rCoef))
+ expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
# Test stats::predict is working
x <- rnorm(15)
y <- x + rnorm(15)