aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-07-09 19:08:33 -0700
committerDB Tsai <dbt@netflix.com>2015-07-09 19:08:33 -0700
commit2727304660663fcf1e41f7b666978c1443262e4e (patch)
tree8101c75e6b2da8484509f82c4a5f0fe898b4a3d3 /mllib
parent0e78e40c0b221e09d9ea22177a99f3bea4b2b4fb (diff)
downloadspark-2727304660663fcf1e41f7b666978c1443262e4e.tar.gz
spark-2727304660663fcf1e41f7b666978c1443262e4e.tar.bz2
spark-2727304660663fcf1e41f7b666978c1443262e4e.zip
[SPARK-8913] [ML] Simplify LogisticRegression suite to use Vector Vector comparision
Cleanup tests from SPARK 8700. Author: Holden Karau <holden@pigscanfly.ca> Closes #7335 from holdenk/SPARK-8913-cleanup-tests-from-SPARK-8700-logistic-regression-r2-really-logistic-regression-this-time and squashes the following commits: e5e2c5f [Holden Karau] Simplify LogisticRegression suite to use Vector <-> Vector comparisions instead of comparing element by element
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala135
1 files changed, 39 insertions, 96 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 27253c1db2..b7dd447538 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -234,20 +234,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7996864
*/
val interceptR = 2.8366423
- val weightsR = Array(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
+ val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights(0) ~== weightsR(0) relTol 1E-3)
- assert(model1.weights(1) ~== weightsR(1) relTol 1E-3)
- assert(model1.weights(2) ~== weightsR(2) relTol 1E-3)
- assert(model1.weights(3) ~== weightsR(3) relTol 1E-3)
+ assert(model1.weights ~= weightsR relTol 1E-3)
// Without regularization, with or without standardization will converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights(0) ~== weightsR(0) relTol 1E-3)
- assert(model2.weights(1) ~== weightsR(1) relTol 1E-3)
- assert(model2.weights(2) ~== weightsR(2) relTol 1E-3)
- assert(model2.weights(3) ~== weightsR(3) relTol 1E-3)
+ assert(model2.weights ~= weightsR relTol 1E-3)
}
test("binary logistic regression without intercept without regularization") {
@@ -277,20 +271,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7407946
*/
val interceptR = 0.0
- val weightsR = Array(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
+ val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights(0) ~== weightsR(0) relTol 1E-2)
- assert(model1.weights(1) ~== weightsR(1) relTol 1E-2)
- assert(model1.weights(2) ~== weightsR(2) relTol 1E-3)
- assert(model1.weights(3) ~== weightsR(3) relTol 1E-3)
+ assert(model1.weights ~= weightsR relTol 1E-2)
// Without regularization, with or without standardization should converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights(0) ~== weightsR(0) relTol 1E-2)
- assert(model2.weights(1) ~== weightsR(1) relTol 1E-2)
- assert(model2.weights(2) ~== weightsR(2) relTol 1E-3)
- assert(model2.weights(3) ~== weightsR(3) relTol 1E-3)
+ assert(model2.weights ~= weightsR relTol 1E-2)
}
test("binary logistic regression with intercept with L1 regularization") {
@@ -321,13 +309,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.02481551
*/
val interceptR1 = -0.05627428
- val weightsR1 = Array(0.0, 0.0, -0.04325749, -0.02481551)
+ val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
assert(model1.intercept ~== interceptR1 relTol 1E-2)
- assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3)
- assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
- assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2)
- assert(model1.weights(3) ~== weightsR1(3) relTol 2E-2)
+ assert(model1.weights ~= weightsR1 absTol 2E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -349,13 +334,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.3722152
- val weightsR2 = Array(0.0, 0.0, -0.1665453, 0.0)
+ val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
assert(model2.intercept ~== interceptR2 relTol 1E-2)
- assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
- assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
- assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
- assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
+ assert(model2.weights ~= weightsR2 absTol 1E-3)
}
test("binary logistic regression without intercept with L1 regularization") {
@@ -387,13 +369,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.03891782
*/
val interceptR1 = 0.0
- val weightsR1 = Array(0.0, 0.0, -0.05189203, -0.03891782)
+ val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3)
- assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
- assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2)
- assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2)
+ assert(model1.weights ~= weightsR1 absTol 1E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -415,13 +394,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
- val weightsR2 = Array(0.0, 0.0, -0.08420782, 0.0)
+ val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
- assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
- assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
- assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
- assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
+ assert(model2.intercept ~== interceptR2 absTol 1E-3)
+ assert(model2.weights ~= weightsR2 absTol 1E-3)
}
test("binary logistic regression with intercept with L2 regularization") {
@@ -452,13 +428,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.10062872
*/
val interceptR1 = 0.15021751
- val weightsR1 = Array(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
+ val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights(0) ~== weightsR1(0) relTol 1E-3)
- assert(model1.weights(1) ~== weightsR1(1) relTol 1E-3)
- assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
- assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
+ assert(model1.weights ~= weightsR1 relTol 1E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -480,13 +453,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.06266838
*/
val interceptR2 = 0.48657516
- val weightsR2 = Array(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
+ val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights(0) ~== weightsR2(0) relTol 1E-3)
- assert(model2.weights(1) ~== weightsR2(1) relTol 1E-3)
- assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3)
- assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3)
+ assert(model2.weights ~= weightsR2 relTol 1E-3)
}
test("binary logistic regression without intercept with L2 regularization") {
@@ -518,13 +488,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.09799775
*/
val interceptR1 = 0.0
- val weightsR1 = Array(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
+ val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
- assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights(0) ~== weightsR1(0) relTol 1E-2)
- assert(model1.weights(1) ~== weightsR1(1) relTol 1E-2)
- assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
- assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
+ assert(model1.intercept ~== interceptR1 absTol 1E-3)
+ assert(model1.weights ~= weightsR1 relTol 1E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -546,13 +513,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.053314311
*/
val interceptR2 = 0.0
- val weightsR2 = Array(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
+ val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
- assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights(0) ~== weightsR2(0) relTol 1E-2)
- assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2)
- assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3)
- assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3)
+ assert(model2.intercept ~== interceptR2 absTol 1E-3)
+ assert(model2.weights ~= weightsR2 relTol 1E-2)
}
test("binary logistic regression with intercept with ElasticNet regularization") {
@@ -583,13 +547,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.15458796
*/
val interceptR1 = 0.57734851
- val weightsR1 = Array(-0.05310287, 0.0, -0.08849250, -0.15458796)
+ val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
assert(model1.intercept ~== interceptR1 relTol 6E-3)
- assert(model1.weights(0) ~== weightsR1(0) relTol 5E-3)
- assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
- assert(model1.weights(2) ~== weightsR1(2) relTol 5E-3)
- assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
+ assert(model1.weights ~== weightsR1 absTol 5E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -611,13 +572,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.05350074
*/
val interceptR2 = 0.51555993
- val weightsR2 = Array(0.0, 0.0, -0.18807395, -0.05350074)
+ val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
assert(model2.intercept ~== interceptR2 relTol 6E-3)
- assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
- assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
- assert(model2.weights(2) ~== weightsR2(2) relTol 5E-3)
- assert(model2.weights(3) ~== weightsR2(3) relTol 1E-2)
+ assert(model2.weights ~= weightsR2 absTol 1E-3)
}
test("binary logistic regression without intercept with ElasticNet regularization") {
@@ -649,13 +607,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.142534158
*/
val interceptR1 = 0.0
- val weightsR1 = Array(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
+ val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights(0) ~== weightsR1(0) absTol 1E-2)
- assert(model1.weights(1) ~== weightsR1(1) absTol 1E-2)
- assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
- assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2)
+ assert(model1.weights ~= weightsR1 absTol 1E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -677,13 +632,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
- val weightsR2 = Array(0.0, 0.03345223, -0.11304532, 0.0)
+ val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
- assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
- assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2)
- assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
- assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
+ assert(model2.intercept ~== interceptR2 absTol 1E-3)
+ assert(model2.weights ~= weightsR2 absTol 1E-3)
}
test("binary logistic regression with intercept with strong L1 regularization") {
@@ -717,19 +669,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
}}}
*/
val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble)
- val weightsTheory = Array(0.0, 0.0, 0.0, 0.0)
+ val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
assert(model1.intercept ~== interceptTheory relTol 1E-5)
- assert(model1.weights(0) ~== weightsTheory(0) absTol 1E-6)
- assert(model1.weights(1) ~== weightsTheory(1) absTol 1E-6)
- assert(model1.weights(2) ~== weightsTheory(2) absTol 1E-6)
- assert(model1.weights(3) ~== weightsTheory(3) absTol 1E-6)
+ assert(model1.weights ~= weightsTheory absTol 1E-6)
assert(model2.intercept ~== interceptTheory relTol 1E-5)
- assert(model2.weights(0) ~== weightsTheory(0) absTol 1E-6)
- assert(model2.weights(1) ~== weightsTheory(1) absTol 1E-6)
- assert(model2.weights(2) ~== weightsTheory(2) absTol 1E-6)
- assert(model2.weights(3) ~== weightsTheory(3) absTol 1E-6)
+ assert(model2.weights ~= weightsTheory absTol 1E-6)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -750,12 +696,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR = -0.248065
- val weightsR = Array(0.0, 0.0, 0.0, 0.0)
+ val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
assert(model1.intercept ~== interceptR relTol 1E-5)
- assert(model1.weights(0) ~== weightsR(0) absTol 1E-6)
- assert(model1.weights(1) ~== weightsR(1) absTol 1E-6)
- assert(model1.weights(2) ~== weightsR(2) absTol 1E-6)
- assert(model1.weights(3) ~== weightsR(3) absTol 1E-6)
+ assert(model1.weights ~= weightsR absTol 1E-6)
}
}