aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test/scala/org
diff options
context:
space:
mode:
authorvectorijk <jiangkai@gmail.com>2015-11-02 16:12:04 -0800
committerDB Tsai <dbt@netflix.com>2015-11-02 16:12:04 -0800
commitc020f7d9d43548d27ae4a9564ba38981fd530cb1 (patch)
tree8dc46ed1b48d88852323747b2d86aedd1c770b64 /mllib/src/test/scala/org
parentec03866a7ef2d0826520755d47c8c9480148a76c (diff)
downloadspark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.gz
spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.bz2
spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.zip
[SPARK-10592] [ML] [PySpark] Deprecate weights and use coefficients instead in ML models
Deprecated in `LogisticRegression` and `LinearRegression` Author: vectorijk <jiangkai@gmail.com> Closes #9311 from vectorijk/spark-10592.
Diffstat (limited to 'mllib/src/test/scala/org')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala152
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala12
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala184
5 files changed, 186 insertions, 174 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e0a795e5e0..325faf37e8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -48,21 +48,22 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
import org.apache.spark.mllib.classification.LogisticRegressionSuite
val nPoints = 10000
- val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+ val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
val data = sc.parallelize(LogisticRegressionSuite.generateMultinomialLogisticInput(
- weights, xMean, xVariance, true, nPoints, 42), 1)
+ coefficients, xMean, xVariance, true, nPoints, 42), 1)
data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1) + ", "
+ x.features(2) + ", " + x.features(3)).saveAsTextFile("path")
*/
binaryDataset = {
val nPoints = 10000
- val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+ val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
- val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+ val testData =
+ generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
sqlContext.createDataFrame(sc.parallelize(testData, 4))
}
@@ -296,8 +297,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -308,14 +309,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7996864
*/
val interceptR = 2.8366423
- val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
+ val coefficientsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
// Without regularization, with or without standardization will converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
}
test("binary logistic regression without intercept without regularization") {
@@ -332,9 +333,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights =
+ coefficients =
coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0, intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -345,14 +346,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7407946
*/
val interceptR = 0.0
- val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
+ val coefficientsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-2)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-2)
// Without regularization, with or without standardization should converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-2)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-2)
}
test("binary logistic regression with intercept with L1 regularization") {
@@ -371,8 +372,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -383,10 +384,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.02481551
*/
val interceptR1 = -0.05627428
- val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
+ val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
assert(model1.intercept ~== interceptR1 relTol 1E-2)
- assert(model1.weights ~= weightsR1 absTol 2E-2)
+ assert(model1.coefficients ~= coefficientsR1 absTol 2E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -395,9 +396,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -408,10 +409,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.3722152
- val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
+ val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
assert(model2.intercept ~== interceptR2 relTol 1E-2)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression without intercept with L1 regularization") {
@@ -430,9 +431,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -443,10 +444,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.03891782
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
+ val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 absTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 absTol 1E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -455,9 +456,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
intercept=FALSE, standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -468,10 +469,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
+ val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression with intercept with L2 regularization") {
@@ -490,8 +491,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -502,10 +503,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.10062872
*/
val interceptR1 = 0.15021751
- val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
+ val coefficientsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -514,9 +515,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -527,10 +528,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.06266838
*/
val interceptR2 = 0.48657516
- val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
+ val coefficientsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
}
test("binary logistic regression without intercept with L2 regularization") {
@@ -549,9 +550,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -562,10 +563,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.09799775
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
+ val coefficientsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-2)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -574,9 +575,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
intercept=FALSE, standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -587,10 +588,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.053314311
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
+ val coefficientsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-2)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
}
test("binary logistic regression with intercept with ElasticNet regularization") {
@@ -609,8 +610,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -621,10 +622,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.15458796
*/
val interceptR1 = 0.57734851
- val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
+ val coefficientsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
assert(model1.intercept ~== interceptR1 relTol 6E-3)
- assert(model1.weights ~== weightsR1 absTol 5E-3)
+ assert(model1.coefficients ~== coefficientsR1 absTol 5E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -633,9 +634,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -646,10 +647,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.05350074
*/
val interceptR2 = 0.51555993
- val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
+ val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
assert(model2.intercept ~== interceptR2 relTol 6E-3)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression without intercept with ElasticNet regularization") {
@@ -668,9 +669,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -681,10 +682,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.142534158
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
+ val coefficientsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 absTol 1E-2)
+ assert(model1.coefficients ~= coefficientsR1 absTol 1E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -693,9 +694,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
intercept=FALSE, standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -706,10 +707,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
+ val coefficientsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression with intercept with strong L1 regularization") {
@@ -732,8 +733,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
}).histogram
/*
- For binary logistic regression with strong L1 regularization, all the weights will be zeros.
- As a result,
+ For binary logistic regression with strong L1 regularization, all the coefficients
+ will be zeros. As a result,
{{{
P(0) = 1 / (1 + \exp(b)), and
P(1) = \exp(b) / (1 + \exp(b))
@@ -743,13 +744,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
}}}
*/
val interceptTheory = math.log(histogram(1) / histogram(0))
- val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
+ val coefficientsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
assert(model1.intercept ~== interceptTheory relTol 1E-5)
- assert(model1.weights ~= weightsTheory absTol 1E-6)
+ assert(model1.coefficients ~= coefficientsTheory absTol 1E-6)
assert(model2.intercept ~== interceptTheory relTol 1E-5)
- assert(model2.weights ~= weightsTheory absTol 1E-6)
+ assert(model2.coefficients ~= coefficientsTheory absTol 1E-6)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -758,8 +759,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -770,10 +771,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR = -0.248065
- val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
+ val coefficientsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
assert(model1.intercept ~== interceptR relTol 1E-5)
- assert(model1.weights ~== weightsR absTol 1E-6)
+ assert(model1.coefficients ~== coefficientsR absTol 1E-6)
}
test("evaluate on test set") {
@@ -814,10 +815,11 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
test("binary logistic regression with weighted samples") {
val (dataset, weightedDataset) = {
val nPoints = 1000
- val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+ val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
- val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+ val testData =
+ generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
// Let's over-sample the positive samples twice.
val data1 = testData.flatMap { case labeledPoint: LabeledPoint =>
@@ -863,9 +865,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model1a0 = trainer1a.fit(dataset)
val model1a1 = trainer1a.fit(weightedDataset)
val model1b = trainer1b.fit(weightedDataset)
- assert(model1a0.weights !~= model1a1.weights absTol 1E-3)
+ assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
- assert(model1a0.weights ~== model1b.weights absTol 1E-3)
+ assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index 2d1df9b2b8..17db8c4477 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -53,16 +53,16 @@ class MultilayerPerceptronClassifierSuite extends SparkFunSuite with MLlibTestSp
test("3 class classification with 2 hidden layers") {
val nPoints = 1000
- // The following weights are taken from OneVsRestSuite.scala
+ // The following coefficients are taken from OneVsRestSuite.scala
// they represent 3-class iris dataset
- val weights = Array(
+ val coefficients = Array(
-0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
-0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
val rdd = sc.parallelize(generateMultinomialLogisticInput(
- weights, xMean, xVariance, true, nPoints, 42), 2)
+ coefficients, xMean, xVariance, true, nPoints, 42), 2)
val dataFrame = sqlContext.createDataFrame(rdd).toDF("label", "features")
val numClasses = 3
val numIterations = 100
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 977f0e0b70..5ea71c5317 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -43,16 +43,16 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext {
val nPoints = 1000
- // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+ // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2
// As a result, we are drawing samples from probability distribution of an actual model.
- val weights = Array(
+ val coefficients = Array(
-0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
-0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
rdd = sc.parallelize(generateMultinomialLogisticInput(
- weights, xMean, xVariance, true, nPoints, 42), 2)
+ coefficients, xMean, xVariance, true, nPoints, 42), 2)
dataset = sqlContext.createDataFrame(rdd)
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index 359f310271..c0f791bce1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -141,12 +141,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
Number of Newton-Raphson Iterations: 5
n= 1000
*/
- val coefficientsR = Vectors.dense(-0.039)
+ val regressionCoefficientsR = Vectors.dense(-0.039)
val interceptR = 1.759
val scaleR = 1.41
assert(model.intercept ~== interceptR relTol 1E-3)
- assert(model.coefficients ~== coefficientsR relTol 1E-3)
+ assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
assert(model.scale ~== scaleR relTol 1E-3)
/*
@@ -212,12 +212,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
Number of Newton-Raphson Iterations: 5
n= 1000
*/
- val coefficientsR = Vectors.dense(-0.0844, 0.0677)
+ val regressionCoefficientsR = Vectors.dense(-0.0844, 0.0677)
val interceptR = 1.9206
val scaleR = 0.977
assert(model.intercept ~== interceptR relTol 1E-3)
- assert(model.coefficients ~== coefficientsR relTol 1E-3)
+ assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
assert(model.scale ~== scaleR relTol 1E-3)
/*
@@ -282,12 +282,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
Number of Newton-Raphson Iterations: 6
n= 1000
*/
- val coefficientsR = Vectors.dense(0.896, -0.709)
+ val regressionCoefficientsR = Vectors.dense(0.896, -0.709)
val interceptR = 0.0
val scaleR = 1.52
assert(model.intercept === interceptR)
- assert(model.coefficients ~== coefficientsR relTol 1E-3)
+ assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
assert(model.scale ~== scaleR relTol 1E-3)
/*
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index a2a5c0bbdc..235c796d78 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -122,8 +122,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
label <- as.numeric(data$V1)
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.298698
@@ -131,17 +131,18 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 7.199082
*/
val interceptR = 6.298698
- val weightsR = Vectors.dense(4.700706, 7.199082)
+ val coefficientsR = Vectors.dense(4.700706, 7.199082)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
val prediction2 =
- features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -159,37 +160,37 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
intercept = FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data.V2. 6.995908
as.numeric.data.V3. 5.275131
*/
- val weightsR = Vectors.dense(6.995908, 5.275131)
+ val coefficientsR = Vectors.dense(6.995908, 5.275131)
assert(model1.intercept ~== 0 absTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
assert(model2.intercept ~== 0 absTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
/*
Then again with the data with no intercept:
- > weightsWithoutIntercept
+ > coefficientsWithourIntercept
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data3.V2. 4.70011
as.numeric.data3.V3. 7.19943
*/
- val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
+ val coefficientsWithourInterceptR = Vectors.dense(4.70011, 7.19943)
assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
- assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR relTol 1E-3)
+ assert(modelWithoutIntercept1.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
- assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR relTol 1E-3)
+ assert(modelWithoutIntercept2.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
}
}
@@ -211,8 +212,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian",
+ alpha = 1.0, lambda = 0.57 ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.24300
@@ -220,14 +222,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 6.679841
*/
val interceptR1 = 6.24300
- val weightsR1 = Vectors.dense(4.024821, 6.679841)
+ val coefficientsR1 = Vectors.dense(4.024821, 6.679841)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- standardize=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+ lambda = 0.57, standardize=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.416948
@@ -235,16 +237,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 6.724286
*/
val interceptR2 = 6.416948
- val weightsR2 = Vectors.dense(3.893869, 6.724286)
+ val coefficientsR2 = Vectors.dense(3.893869, 6.724286)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -269,9 +272,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- intercept=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+ lambda = 0.57, intercept=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -279,15 +282,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.772913
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(6.299752, 4.772913)
+ val coefficientsR1 = Vectors.dense(6.299752, 4.772913)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- intercept=FALSE, standardize=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+ lambda = 0.57, intercept=FALSE, standardize=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -295,16 +298,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.764229
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(6.232193, 4.764229)
+ val coefficientsR2 = Vectors.dense(6.232193, 4.764229)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -321,8 +325,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 5.269376
@@ -330,15 +334,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 5.712356)
*/
val interceptR1 = 5.269376
- val weightsR1 = Vectors.dense(3.736216, 5.712356)
+ val coefficientsR1 = Vectors.dense(3.736216, 5.712356)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
standardize=FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 5.791109
@@ -346,15 +350,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 5.910406
*/
val interceptR2 = 5.791109
- val weightsR2 = Vectors.dense(3.435466, 5.910406)
+ val coefficientsR2 = Vectors.dense(3.435466, 5.910406)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
val prediction2 =
- features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -370,9 +375,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
intercept = FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -380,15 +385,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.214502
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(5.522875, 4.214502)
+ val coefficientsR1 = Vectors.dense(5.522875, 4.214502)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
intercept = FALSE, standardize=FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -396,15 +401,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.187419
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(5.263704, 4.187419)
+ val coefficientsR2 = Vectors.dense(5.263704, 4.187419)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
val prediction2 =
- features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -428,8 +434,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+ lambda = 1.6 ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.324108
@@ -437,15 +444,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 5.200403
*/
val interceptR1 = 5.696056
- val weightsR1 = Vectors.dense(3.670489, 6.001122)
+ val coefficientsR1 = Vectors.dense(3.670489, 6.001122)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
standardize=FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.114723
@@ -453,16 +460,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 6.146531
*/
val interceptR2 = 6.114723
- val weightsR2 = Vectors.dense(3.409937, 6.146531)
+ val coefficientsR2 = Vectors.dense(3.409937, 6.146531)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -487,9 +495,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
- intercept=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+ lambda = 1.6, intercept=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -497,15 +505,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.dataM.V3. 4.322251
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(5.673348, 4.322251)
+ val coefficientsR1 = Vectors.dense(5.673348, 4.322251)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
- intercept=FALSE, standardize=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+ lambda = 1.6, intercept=FALSE, standardize=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -513,16 +521,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.297622
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(5.477988, 4.297622)
+ val coefficientsR2 = Vectors.dense(5.477988, 4.297622)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -554,7 +563,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val expectedResiduals = datasetWithDenseFeature.select("features", "label")
.map { case Row(features: DenseVector, label: Double) =>
val prediction =
- features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept
+ features(0) * model.coefficients(0) + features(1) * model.coefficients(1) +
+ model.intercept
label - prediction
}
.zip(model.summary.residuals.map(_.getDouble(0)))
@@ -663,9 +673,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model1a1 = trainer1a.fit(weightedData)
val model1b = trainer1b.fit(weightedData)
- assert(model1a0.weights !~= model1a1.weights absTol 1E-3)
+ assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
- assert(model1a0.weights ~== model1b.weights absTol 1E-3)
+ assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
val trainer2a = (new LinearRegression).setFitIntercept(true)
@@ -675,9 +685,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2a0 = trainer2a.fit(data)
val model2a1 = trainer2a.fit(weightedData)
val model2b = trainer2b.fit(weightedData)
- assert(model2a0.weights !~= model2a1.weights absTol 1E-3)
+ assert(model2a0.coefficients !~= model2a1.coefficients absTol 1E-3)
assert(model2a0.intercept !~= model2a1.intercept absTol 1E-3)
- assert(model2a0.weights ~== model2b.weights absTol 1E-3)
+ assert(model2a0.coefficients ~== model2b.coefficients absTol 1E-3)
assert(model2a0.intercept ~== model2b.intercept absTol 1E-3)
val trainer3a = (new LinearRegression).setFitIntercept(false)
@@ -687,8 +697,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model3a0 = trainer3a.fit(data)
val model3a1 = trainer3a.fit(weightedData)
val model3b = trainer3b.fit(weightedData)
- assert(model3a0.weights !~= model3a1.weights absTol 1E-3)
- assert(model3a0.weights ~== model3b.weights absTol 1E-3)
+ assert(model3a0.coefficients !~= model3a1.coefficients absTol 1E-3)
+ assert(model3a0.coefficients ~== model3b.coefficients absTol 1E-3)
val trainer4a = (new LinearRegression).setFitIntercept(false)
.setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
@@ -697,8 +707,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model4a0 = trainer4a.fit(data)
val model4a1 = trainer4a.fit(weightedData)
val model4b = trainer4b.fit(weightedData)
- assert(model4a0.weights !~= model4a1.weights absTol 1E-3)
- assert(model4a0.weights ~== model4b.weights absTol 1E-3)
+ assert(model4a0.coefficients !~= model4a1.coefficients absTol 1E-3)
+ assert(model4a0.coefficients ~== model4b.coefficients absTol 1E-3)
}
}