aboutsummaryrefslogtreecommitdiff
path: root/mllib/src
diff options
context:
space:
mode:
authoractuaryzhang <actuaryzhang10@gmail.com>2017-01-23 00:53:44 -0800
committerYanbo Liang <ybliang8@gmail.com>2017-01-23 00:53:44 -0800
commitf067acefabebf04939d03a639a2aaa654e1bc8f9 (patch)
treeb002099eb0dc33f909077125bf734cca916c0b32 /mllib/src
parentde6ad3dfa7f4fdc8bb049f31142df9e5c01e6d13 (diff)
downloadspark-f067acefabebf04939d03a639a2aaa654e1bc8f9.tar.gz
spark-f067acefabebf04939d03a639a2aaa654e1bc8f9.tar.bz2
spark-f067acefabebf04939d03a639a2aaa654e1bc8f9.zip
[SPARK-19155][ML] Make family case insensitive in GLM
## What changes were proposed in this pull request? This is a supplement to PR #16516 which did not make the value from `getFamily` case insensitive. Current tests of poisson/binomial glm with weight fail when specifying 'Poisson' or 'Binomial', because the calculation of `dispersion` and `pValue` checks the value of family retrieved from `getFamily` ``` model.getFamily == Binomial.name || model.getFamily == Poisson.name ``` ## How was this patch tested? Update existing tests for 'Poisson' and 'Binomial'. yanboliang felixcheung imatiach-msft Author: actuaryzhang <actuaryzhang10@gmail.com> Closes #16675 from actuaryzhang/family.
Diffstat (limited to 'mllib/src')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala4
2 files changed, 6 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 116f0f6507..3ffed39898 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1044,7 +1044,8 @@ class GeneralizedLinearRegressionSummary private[regression] (
*/
@Since("2.0.0")
lazy val dispersion: Double = if (
- model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+ model.getFamily.toLowerCase == Binomial.name ||
+ model.getFamily.toLowerCase == Poisson.name) {
1.0
} else {
val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 2.0))).first().getDouble(0)
@@ -1147,7 +1148,8 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
@Since("2.0.0")
lazy val pValues: Array[Double] = {
if (isNormalSolver) {
- if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+ if (model.getFamily.toLowerCase == Binomial.name ||
+ model.getFamily.toLowerCase == Poisson.name) {
tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) }
} else {
tValues.map { x =>
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 9f3d643c2b..e3c278777c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -758,7 +758,7 @@ class GeneralizedLinearRegressionSuite
0.028480 0.069123 0.935495 -0.049613
*/
val trainer = new GeneralizedLinearRegression()
- .setFamily("binomial")
+ .setFamily("Binomial")
.setWeightCol("weight")
.setFitIntercept(false)
@@ -875,7 +875,7 @@ class GeneralizedLinearRegressionSuite
-0.4378554 0.2189277 0.1459518 -0.1094638
*/
val trainer = new GeneralizedLinearRegression()
- .setFamily("poisson")
+ .setFamily("Poisson")
.setWeightCol("weight")
.setFitIntercept(true)