[SPARK-19391][SPARKR][ML] Tweedie GLM API for SparkR

## What changes were proposed in this pull request? Port Tweedie GLM #16344 to SparkR felixcheung yanboliang ## How was this patch tested? new test in SparkR Author: actuaryzhang <actuaryzhang10@gmail.com> Closes #16729 from actuaryzhang/sparkRTweedie.
author: actuaryzhang <actuaryzhang10@gmail.com> 2017-03-14 00:50:38 -0700
committer: Felix Cheung <felixcheung@apache.org> 2017-03-14 00:50:38 -0700
commit: f6314eab4b494bd5b5e9e41c6f582d4f22c0967a (patch)
tree: ff067df4be9eb6f3b660abf8332136d778201146 /R/pkg/inst/tests
parent: 415f9f3423aacc395097e40427364c921a2ed7f1 (diff)
download: spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.tar.gz
spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.tar.bz2
spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.zip
1 files changed, 37 insertions, 1 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R
index 81a5bdc414..3e9ad77198 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_regression.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_regression.R
@@ -77,6 +77,24 @@ test_that("spark.glm and predict", {
   out <- capture.output(print(summary(model)))
   expect_true(any(grepl("Dispersion parameter for gamma family", out)))
 
+  # tweedie family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
+                     family = "tweedie", var.power = 1.2, link.power = 0.0)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+
+  # manual calculation of the R predicted values to avoid dependence on statmod
+  #' library(statmod)
+  #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+  #'             family = tweedie(var.power = 1.2, link.power = 0.0))
+  #' print(coef(rModel))
+
+  rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+  rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+                                       data = iris) %*% rCoef))
+  expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
   # Test stats::predict is working
   x <- rnorm(15)
   y <- x + rnorm(15)
@@ -233,7 +251,7 @@ test_that("glm and predict", {
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
-               prediction <- predict(model, training)
+  prediction <- predict(model, training)
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
   vals <- collect(select(prediction, "prediction"))
   rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
@@ -249,6 +267,24 @@ test_that("glm and predict", {
                                         data = iris, family = poisson(link = identity)), iris))
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 
+  # tweedie family
+  model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training,
+               family = "tweedie", var.power = 1.2, link.power = 0.0)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+
+  # manual calculation of the R predicted values to avoid dependence on statmod
+  #' library(statmod)
+  #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+  #'             family = tweedie(var.power = 1.2, link.power = 0.0))
+  #' print(coef(rModel))
+
+  rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+  rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+                                   data = iris) %*% rCoef))
+  expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
   # Test stats::predict is working
   x <- rnorm(15)
   y <- x + rnorm(15)
author	actuaryzhang <actuaryzhang10@gmail.com>	2017-03-14 00:50:38 -0700
committer	Felix Cheung <felixcheung@apache.org>	2017-03-14 00:50:38 -0700
commit	f6314eab4b494bd5b5e9e41c6f582d4f22c0967a (patch)
tree	ff067df4be9eb6f3b660abf8332136d778201146 /R/pkg/inst/tests
parent	415f9f3423aacc395097e40427364c921a2ed7f1 (diff)
download	spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.tar.gz spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.tar.bz2 spark-f6314eab4b494bd5b5e9e41c6f582d4f22c0967a.zip