aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorvectorijk <jiangkai@gmail.com>2015-11-02 16:12:04 -0800
committerDB Tsai <dbt@netflix.com>2015-11-02 16:12:04 -0800
commitc020f7d9d43548d27ae4a9564ba38981fd530cb1 (patch)
tree8dc46ed1b48d88852323747b2d86aedd1c770b64 /mllib
parentec03866a7ef2d0826520755d47c8c9480148a76c (diff)
downloadspark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.gz
spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.bz2
spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.zip
[SPARK-10592] [ML] [PySpark] Deprecate weights and use coefficients instead in ML models
Deprecated in `LogisticRegression` and `LinearRegression` Author: vectorijk <jiangkai@gmail.com> Closes #9311 from vectorijk/spark-10592.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala11
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala15
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala32
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala15
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala152
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala12
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala184
11 files changed, 235 insertions, 208 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 6f839ff4d7..a1335e7a1b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -392,11 +392,14 @@ class LogisticRegression(override val uid: String)
@Experimental
class LogisticRegressionModel private[ml] (
override val uid: String,
- val weights: Vector,
+ val coefficients: Vector,
val intercept: Double)
extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
with LogisticRegressionParams {
+ @deprecated("Use coefficients instead.", "1.6.0")
+ def weights: Vector = coefficients
+
override def setThreshold(value: Double): this.type = super.setThreshold(value)
override def getThreshold: Double = super.getThreshold
@@ -407,7 +410,7 @@ class LogisticRegressionModel private[ml] (
/** Margin (rawPrediction) for class label 1. For binary classification only. */
private val margin: Vector => Double = (features) => {
- BLAS.dot(features, weights) + intercept
+ BLAS.dot(features, coefficients) + intercept
}
/** Score (probability) for class label 1. For binary classification only. */
@@ -416,7 +419,7 @@ class LogisticRegressionModel private[ml] (
1.0 / (1.0 + math.exp(-m))
}
- override val numFeatures: Int = weights.size
+ override val numFeatures: Int = coefficients.size
override val numClasses: Int = 2
@@ -483,7 +486,7 @@ class LogisticRegressionModel private[ml] (
}
override def copy(extra: ParamMap): LogisticRegressionModel = {
- val newModel = copyValues(new LogisticRegressionModel(uid, weights, intercept), extra)
+ val newModel = copyValues(new LogisticRegressionModel(uid, coefficients, intercept), extra)
if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
newModel.setParent(parent)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
index 21ebf6d916..9162ec0e4e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
@@ -51,13 +51,22 @@ private[r] object SparkRWrappers {
pipeline.fit(df)
}
+ @deprecated("Use getModelCoefficients instead.", "1.6.0")
def getModelWeights(model: PipelineModel): Array[Double] = {
model.stages.last match {
case m: LinearRegressionModel =>
Array(m.intercept) ++ m.weights.toArray
- case _: LogisticRegressionModel =>
- throw new UnsupportedOperationException(
- "No weights available for LogisticRegressionModel") // SPARK-9492
+ case m: LogisticRegressionModel =>
+ Array(m.intercept) ++ m.weights.toArray
+ }
+ }
+
+ def getModelCoefficients(model: PipelineModel): Array[Double] = {
+ model.stages.last match {
+ case m: LinearRegressionModel =>
+ Array(m.intercept) ++ m.coefficients.toArray
+ case m: LogisticRegressionModel =>
+ Array(m.intercept) ++ m.coefficients.toArray
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index ac2c3d825f..4dbbc7d399 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -200,17 +200,17 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
val numFeatures = dataset.select($(featuresCol)).take(1)(0).getAs[Vector](0).size
/*
- The weights vector has three parts:
+ The coefficients vector has three parts:
the first element: Double, log(sigma), the log of scale parameter
the second element: Double, intercept of the beta parameter
the third to the end elements: Doubles, regression coefficients vector of the beta parameter
*/
- val initialWeights = Vectors.zeros(numFeatures + 2)
+ val initialCoefficients = Vectors.zeros(numFeatures + 2)
val states = optimizer.iterations(new CachedDiffFunction(costFun),
- initialWeights.toBreeze.toDenseVector)
+ initialCoefficients.toBreeze.toDenseVector)
- val weights = {
+ val coefficients = {
val arrayBuilder = mutable.ArrayBuilder.make[Double]
var state: optimizer.State = null
while (states.hasNext) {
@@ -227,10 +227,10 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
if (handlePersistence) instances.unpersist()
- val coefficients = Vectors.dense(weights.slice(2, weights.length))
- val intercept = weights(1)
- val scale = math.exp(weights(0))
- val model = new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale)
+ val regressionCoefficients = Vectors.dense(coefficients.slice(2, coefficients.length))
+ val intercept = coefficients(1)
+ val scale = math.exp(coefficients(0))
+ val model = new AFTSurvivalRegressionModel(uid, regressionCoefficients, intercept, scale)
copyValues(model.setParent(this))
}
@@ -251,7 +251,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
@Since("1.6.0")
class AFTSurvivalRegressionModel private[ml] (
@Since("1.6.0") override val uid: String,
- @Since("1.6.0") val coefficients: Vector,
+ @Since("1.6.0") val regressionCoefficients: Vector,
@Since("1.6.0") val intercept: Double,
@Since("1.6.0") val scale: Double)
extends Model[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams {
@@ -275,7 +275,7 @@ class AFTSurvivalRegressionModel private[ml] (
@Since("1.6.0")
def predictQuantiles(features: Vector): Vector = {
// scale parameter for the Weibull distribution of lifetime
- val lambda = math.exp(BLAS.dot(coefficients, features) + intercept)
+ val lambda = math.exp(BLAS.dot(regressionCoefficients, features) + intercept)
// shape parameter for the Weibull distribution of lifetime
val k = 1 / scale
val quantiles = $(quantileProbabilities).map {
@@ -286,7 +286,7 @@ class AFTSurvivalRegressionModel private[ml] (
@Since("1.6.0")
def predict(features: Vector): Double = {
- math.exp(BLAS.dot(coefficients, features) + intercept)
+ math.exp(BLAS.dot(regressionCoefficients, features) + intercept)
}
@Since("1.6.0")
@@ -309,7 +309,7 @@ class AFTSurvivalRegressionModel private[ml] (
@Since("1.6.0")
override def copy(extra: ParamMap): AFTSurvivalRegressionModel = {
- copyValues(new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale), extra)
+ copyValues(new AFTSurvivalRegressionModel(uid, regressionCoefficients, intercept, scale), extra)
.setParent(parent)
}
}
@@ -369,17 +369,17 @@ class AFTSurvivalRegressionModel private[ml] (
* \frac{\partial (-\iota)}{\partial (\log\sigma)}=
* \sum_{i=1}^{n}[\delta_{i}+(\delta_{i}-e^{\epsilon_{i}})\epsilon_{i}]
* }}}
- * @param weights The log of scale parameter, the intercept and
+ * @param coefficients including three part: The log of scale parameter, the intercept and
* regression coefficients corresponding to the features.
* @param fitIntercept Whether to fit an intercept term.
*/
-private class AFTAggregator(weights: BDV[Double], fitIntercept: Boolean)
+private class AFTAggregator(coefficients: BDV[Double], fitIntercept: Boolean)
extends Serializable {
// beta is the intercept and regression coefficients to the covariates
- private val beta = weights.slice(1, weights.length)
+ private val beta = coefficients.slice(1, coefficients.length)
// sigma is the scale parameter of the AFT model
- private val sigma = math.exp(weights(0))
+ private val sigma = math.exp(coefficients(0))
private var totalCnt: Long = 0L
private var lossSum = 0.0
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 2ff500f291..f4a17c8f9a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -87,8 +87,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
lit(1.0)
}
dataset.select(col($(labelCol)), f, w)
- .map { case Row(label: Double, feature: Double, weights: Double) =>
- (label, feature, weights)
+ .map { case Row(label: Double, feature: Double, weight: Double) =>
+ (label, feature, weight)
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index f663b9bd9a..6e9c7442b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -203,7 +203,7 @@ class LinearRegression(override val uid: String)
val yMean = ySummarizer.mean(0)
val yStd = math.sqrt(ySummarizer.variance(0))
- // If the yStd is zero, then the intercept is yMean with zero weights;
+ // If the yStd is zero, then the intercept is yMean with zero coefficient;
// as a result, training is not needed.
if (yStd == 0.0) {
logWarning(s"The standard deviation of the label is zero, so the coefficients will be " +
@@ -331,14 +331,17 @@ class LinearRegression(override val uid: String)
@Experimental
class LinearRegressionModel private[ml] (
override val uid: String,
- val weights: Vector,
+ val coefficients: Vector,
val intercept: Double)
extends RegressionModel[Vector, LinearRegressionModel]
with LinearRegressionParams {
private var trainingSummary: Option[LinearRegressionTrainingSummary] = None
- override val numFeatures: Int = weights.size
+ @deprecated("Use coefficients instead.", "1.6.0")
+ def weights: Vector = coefficients
+
+ override val numFeatures: Int = coefficients.size
/**
* Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is
@@ -387,11 +390,11 @@ class LinearRegressionModel private[ml] (
override protected def predict(features: Vector): Double = {
- dot(features, weights) + intercept
+ dot(features, coefficients) + intercept
}
override def copy(extra: ParamMap): LinearRegressionModel = {
- val newModel = copyValues(new LinearRegressionModel(uid, weights, intercept), extra)
+ val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra)
if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
newModel.setParent(parent)
}
@@ -400,7 +403,7 @@ class LinearRegressionModel private[ml] (
/**
* :: Experimental ::
* Linear regression training results. Currently, the training summary ignores the
- * training weights except for the objective trace.
+ * training coefficients except for the objective trace.
* @param predictions predictions outputted by the model's `transform` method.
* @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
*/
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
index 253cabf013..cbabafe1b5 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
@@ -47,16 +47,16 @@ public class JavaOneVsRestSuite implements Serializable {
jsql = new SQLContext(jsc);
int nPoints = 3;
- // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+ // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2.
// As a result, we are drawing samples from probability distribution of an actual model.
- double[] weights = {
+ double[] coefficients = {
-0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
-0.16624, -0.84355, -0.048509, -0.301789, 4.170682 };
double[] xMean = {5.843, 3.057, 3.758, 1.199};
double[] xVariance = {0.6856, 0.1899, 3.116, 0.581};
List<LabeledPoint> points = JavaConverters.seqAsJavaListConverter(
- generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+ generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
).asJava();
datasetRDD = jsc.parallelize(points, 2);
dataset = jsql.createDataFrame(datasetRDD, LabeledPoint.class);
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e0a795e5e0..325faf37e8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -48,21 +48,22 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
import org.apache.spark.mllib.classification.LogisticRegressionSuite
val nPoints = 10000
- val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+ val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
val data = sc.parallelize(LogisticRegressionSuite.generateMultinomialLogisticInput(
- weights, xMean, xVariance, true, nPoints, 42), 1)
+ coefficients, xMean, xVariance, true, nPoints, 42), 1)
data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1) + ", "
+ x.features(2) + ", " + x.features(3)).saveAsTextFile("path")
*/
binaryDataset = {
val nPoints = 10000
- val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+ val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
- val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+ val testData =
+ generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
sqlContext.createDataFrame(sc.parallelize(testData, 4))
}
@@ -296,8 +297,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -308,14 +309,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7996864
*/
val interceptR = 2.8366423
- val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
+ val coefficientsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
// Without regularization, with or without standardization will converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
}
test("binary logistic regression without intercept without regularization") {
@@ -332,9 +333,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights =
+ coefficients =
coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0, intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -345,14 +346,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.7407946
*/
val interceptR = 0.0
- val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
+ val coefficientsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-2)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-2)
// Without regularization, with or without standardization should converge to the same solution.
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-2)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-2)
}
test("binary logistic regression with intercept with L1 regularization") {
@@ -371,8 +372,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -383,10 +384,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.02481551
*/
val interceptR1 = -0.05627428
- val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
+ val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
assert(model1.intercept ~== interceptR1 relTol 1E-2)
- assert(model1.weights ~= weightsR1 absTol 2E-2)
+ assert(model1.coefficients ~= coefficientsR1 absTol 2E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -395,9 +396,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -408,10 +409,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.3722152
- val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
+ val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
assert(model2.intercept ~== interceptR2 relTol 1E-2)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression without intercept with L1 regularization") {
@@ -430,9 +431,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -443,10 +444,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.03891782
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
+ val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 absTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 absTol 1E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -455,9 +456,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
intercept=FALSE, standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -468,10 +469,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
+ val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression with intercept with L2 regularization") {
@@ -490,8 +491,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -502,10 +503,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.10062872
*/
val interceptR1 = 0.15021751
- val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
+ val coefficientsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -514,9 +515,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -527,10 +528,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.06266838
*/
val interceptR2 = 0.48657516
- val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
+ val coefficientsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
}
test("binary logistic regression without intercept with L2 regularization") {
@@ -549,9 +550,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -562,10 +563,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.09799775
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
+ val coefficientsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-2)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -574,9 +575,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
intercept=FALSE, standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -587,10 +588,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.053314311
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
+ val coefficientsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-2)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
}
test("binary logistic regression with intercept with ElasticNet regularization") {
@@ -609,8 +610,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -621,10 +622,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.15458796
*/
val interceptR1 = 0.57734851
- val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
+ val coefficientsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
assert(model1.intercept ~== interceptR1 relTol 6E-3)
- assert(model1.weights ~== weightsR1 absTol 5E-3)
+ assert(model1.coefficients ~== coefficientsR1 absTol 5E-3)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -633,9 +634,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -646,10 +647,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.05350074
*/
val interceptR2 = 0.51555993
- val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
+ val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
assert(model2.intercept ~== interceptR2 relTol 6E-3)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression without intercept with ElasticNet regularization") {
@@ -668,9 +669,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
intercept=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -681,10 +682,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 -0.142534158
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
+ val coefficientsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 absTol 1E-2)
+ assert(model1.coefficients ~= coefficientsR1 absTol 1E-2)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -693,9 +694,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
intercept=FALSE, standardize=FALSE))
- weights
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -706,10 +707,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
+ val coefficientsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 absTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
}
test("binary logistic regression with intercept with strong L1 regularization") {
@@ -732,8 +733,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
}).histogram
/*
- For binary logistic regression with strong L1 regularization, all the weights will be zeros.
- As a result,
+ For binary logistic regression with strong L1 regularization, all the coefficients
+ will be zeros. As a result,
{{{
P(0) = 1 / (1 + \exp(b)), and
P(1) = \exp(b) / (1 + \exp(b))
@@ -743,13 +744,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
}}}
*/
val interceptTheory = math.log(histogram(1) / histogram(0))
- val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
+ val coefficientsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
assert(model1.intercept ~== interceptTheory relTol 1E-5)
- assert(model1.weights ~= weightsTheory absTol 1E-6)
+ assert(model1.coefficients ~= coefficientsTheory absTol 1E-6)
assert(model2.intercept ~== interceptTheory relTol 1E-5)
- assert(model2.weights ~= weightsTheory absTol 1E-6)
+ assert(model2.coefficients ~= coefficientsTheory absTol 1E-6)
/*
Using the following R code to load the data and train the model using glmnet package.
@@ -758,8 +759,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE)
label = factor(data$V1)
features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
- weights = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
- weights
+ coefficients = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
+ coefficients
5 x 1 sparse Matrix of class "dgCMatrix"
s0
@@ -770,10 +771,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data.V5 .
*/
val interceptR = -0.248065
- val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
+ val coefficientsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
assert(model1.intercept ~== interceptR relTol 1E-5)
- assert(model1.weights ~== weightsR absTol 1E-6)
+ assert(model1.coefficients ~== coefficientsR absTol 1E-6)
}
test("evaluate on test set") {
@@ -814,10 +815,11 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
test("binary logistic regression with weighted samples") {
val (dataset, weightedDataset) = {
val nPoints = 1000
- val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+ val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
- val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+ val testData =
+ generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
// Let's over-sample the positive samples twice.
val data1 = testData.flatMap { case labeledPoint: LabeledPoint =>
@@ -863,9 +865,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model1a0 = trainer1a.fit(dataset)
val model1a1 = trainer1a.fit(weightedDataset)
val model1b = trainer1b.fit(weightedDataset)
- assert(model1a0.weights !~= model1a1.weights absTol 1E-3)
+ assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
- assert(model1a0.weights ~== model1b.weights absTol 1E-3)
+ assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index 2d1df9b2b8..17db8c4477 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -53,16 +53,16 @@ class MultilayerPerceptronClassifierSuite extends SparkFunSuite with MLlibTestSp
test("3 class classification with 2 hidden layers") {
val nPoints = 1000
- // The following weights are taken from OneVsRestSuite.scala
+ // The following coefficients are taken from OneVsRestSuite.scala
// they represent 3-class iris dataset
- val weights = Array(
+ val coefficients = Array(
-0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
-0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
val rdd = sc.parallelize(generateMultinomialLogisticInput(
- weights, xMean, xVariance, true, nPoints, 42), 2)
+ coefficients, xMean, xVariance, true, nPoints, 42), 2)
val dataFrame = sqlContext.createDataFrame(rdd).toDF("label", "features")
val numClasses = 3
val numIterations = 100
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 977f0e0b70..5ea71c5317 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -43,16 +43,16 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext {
val nPoints = 1000
- // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+ // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2
// As a result, we are drawing samples from probability distribution of an actual model.
- val weights = Array(
+ val coefficients = Array(
-0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
-0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
val xMean = Array(5.843, 3.057, 3.758, 1.199)
val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
rdd = sc.parallelize(generateMultinomialLogisticInput(
- weights, xMean, xVariance, true, nPoints, 42), 2)
+ coefficients, xMean, xVariance, true, nPoints, 42), 2)
dataset = sqlContext.createDataFrame(rdd)
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index 359f310271..c0f791bce1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -141,12 +141,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
Number of Newton-Raphson Iterations: 5
n= 1000
*/
- val coefficientsR = Vectors.dense(-0.039)
+ val regressionCoefficientsR = Vectors.dense(-0.039)
val interceptR = 1.759
val scaleR = 1.41
assert(model.intercept ~== interceptR relTol 1E-3)
- assert(model.coefficients ~== coefficientsR relTol 1E-3)
+ assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
assert(model.scale ~== scaleR relTol 1E-3)
/*
@@ -212,12 +212,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
Number of Newton-Raphson Iterations: 5
n= 1000
*/
- val coefficientsR = Vectors.dense(-0.0844, 0.0677)
+ val regressionCoefficientsR = Vectors.dense(-0.0844, 0.0677)
val interceptR = 1.9206
val scaleR = 0.977
assert(model.intercept ~== interceptR relTol 1E-3)
- assert(model.coefficients ~== coefficientsR relTol 1E-3)
+ assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
assert(model.scale ~== scaleR relTol 1E-3)
/*
@@ -282,12 +282,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
Number of Newton-Raphson Iterations: 6
n= 1000
*/
- val coefficientsR = Vectors.dense(0.896, -0.709)
+ val regressionCoefficientsR = Vectors.dense(0.896, -0.709)
val interceptR = 0.0
val scaleR = 1.52
assert(model.intercept === interceptR)
- assert(model.coefficients ~== coefficientsR relTol 1E-3)
+ assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
assert(model.scale ~== scaleR relTol 1E-3)
/*
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index a2a5c0bbdc..235c796d78 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -122,8 +122,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
label <- as.numeric(data$V1)
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.298698
@@ -131,17 +131,18 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 7.199082
*/
val interceptR = 6.298698
- val weightsR = Vectors.dense(4.700706, 7.199082)
+ val coefficientsR = Vectors.dense(4.700706, 7.199082)
assert(model1.intercept ~== interceptR relTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
assert(model2.intercept ~== interceptR relTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
val prediction2 =
- features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -159,37 +160,37 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
intercept = FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data.V2. 6.995908
as.numeric.data.V3. 5.275131
*/
- val weightsR = Vectors.dense(6.995908, 5.275131)
+ val coefficientsR = Vectors.dense(6.995908, 5.275131)
assert(model1.intercept ~== 0 absTol 1E-3)
- assert(model1.weights ~= weightsR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
assert(model2.intercept ~== 0 absTol 1E-3)
- assert(model2.weights ~= weightsR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
/*
Then again with the data with no intercept:
- > weightsWithoutIntercept
+ > coefficientsWithourIntercept
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
as.numeric.data3.V2. 4.70011
as.numeric.data3.V3. 7.19943
*/
- val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
+ val coefficientsWithourInterceptR = Vectors.dense(4.70011, 7.19943)
assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
- assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR relTol 1E-3)
+ assert(modelWithoutIntercept1.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
- assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR relTol 1E-3)
+ assert(modelWithoutIntercept2.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
}
}
@@ -211,8 +212,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian",
+ alpha = 1.0, lambda = 0.57 ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.24300
@@ -220,14 +222,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 6.679841
*/
val interceptR1 = 6.24300
- val weightsR1 = Vectors.dense(4.024821, 6.679841)
+ val coefficientsR1 = Vectors.dense(4.024821, 6.679841)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- standardize=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+ lambda = 0.57, standardize=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.416948
@@ -235,16 +237,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 6.724286
*/
val interceptR2 = 6.416948
- val weightsR2 = Vectors.dense(3.893869, 6.724286)
+ val coefficientsR2 = Vectors.dense(3.893869, 6.724286)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -269,9 +272,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- intercept=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+ lambda = 0.57, intercept=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -279,15 +282,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.772913
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(6.299752, 4.772913)
+ val coefficientsR1 = Vectors.dense(6.299752, 4.772913)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
- intercept=FALSE, standardize=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+ lambda = 0.57, intercept=FALSE, standardize=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -295,16 +298,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.764229
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(6.232193, 4.764229)
+ val coefficientsR2 = Vectors.dense(6.232193, 4.764229)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -321,8 +325,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 5.269376
@@ -330,15 +334,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 5.712356)
*/
val interceptR1 = 5.269376
- val weightsR1 = Vectors.dense(3.736216, 5.712356)
+ val coefficientsR1 = Vectors.dense(3.736216, 5.712356)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
standardize=FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 5.791109
@@ -346,15 +350,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 5.910406
*/
val interceptR2 = 5.791109
- val weightsR2 = Vectors.dense(3.435466, 5.910406)
+ val coefficientsR2 = Vectors.dense(3.435466, 5.910406)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
val prediction2 =
- features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -370,9 +375,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
intercept = FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -380,15 +385,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.214502
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(5.522875, 4.214502)
+ val coefficientsR1 = Vectors.dense(5.522875, 4.214502)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
intercept = FALSE, standardize=FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -396,15 +401,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.187419
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(5.263704, 4.187419)
+ val coefficientsR2 = Vectors.dense(5.263704, 4.187419)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
val prediction2 =
- features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -428,8 +434,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+ lambda = 1.6 ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.324108
@@ -437,15 +444,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 5.200403
*/
val interceptR1 = 5.696056
- val weightsR1 = Vectors.dense(3.670489, 6.001122)
+ val coefficientsR1 = Vectors.dense(3.670489, 6.001122)
assert(model1.intercept ~== interceptR1 relTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
standardize=FALSE))
- > weights
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.114723
@@ -453,16 +460,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 6.146531
*/
val interceptR2 = 6.114723
- val weightsR2 = Vectors.dense(3.409937, 6.146531)
+ val coefficientsR2 = Vectors.dense(3.409937, 6.146531)
assert(model2.intercept ~== interceptR2 relTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -487,9 +495,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2 = trainer2.fit(datasetWithDenseFeature)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
- intercept=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+ lambda = 1.6, intercept=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -497,15 +505,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.dataM.V3. 4.322251
*/
val interceptR1 = 0.0
- val weightsR1 = Vectors.dense(5.673348, 4.322251)
+ val coefficientsR1 = Vectors.dense(5.673348, 4.322251)
assert(model1.intercept ~== interceptR1 absTol 1E-3)
- assert(model1.weights ~= weightsR1 relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
/*
- weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
- intercept=FALSE, standardize=FALSE))
- > weights
+ coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+ lambda = 1.6, intercept=FALSE, standardize=FALSE ))
+ > coefficients
3 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) .
@@ -513,16 +521,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
as.numeric.data.V3. 4.297622
*/
val interceptR2 = 0.0
- val weightsR2 = Vectors.dense(5.477988, 4.297622)
+ val coefficientsR2 = Vectors.dense(5.477988, 4.297622)
assert(model2.intercept ~== interceptR2 absTol 1E-3)
- assert(model2.weights ~= weightsR2 relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
model1.transform(datasetWithDenseFeature).select("features", "prediction")
.collect().foreach {
case Row(features: DenseVector, prediction1: Double) =>
- val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
- model1.intercept
+ val prediction2 =
+ features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+ model1.intercept
assert(prediction1 ~== prediction2 relTol 1E-5)
}
}
@@ -554,7 +563,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val expectedResiduals = datasetWithDenseFeature.select("features", "label")
.map { case Row(features: DenseVector, label: Double) =>
val prediction =
- features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept
+ features(0) * model.coefficients(0) + features(1) * model.coefficients(1) +
+ model.intercept
label - prediction
}
.zip(model.summary.residuals.map(_.getDouble(0)))
@@ -663,9 +673,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model1a1 = trainer1a.fit(weightedData)
val model1b = trainer1b.fit(weightedData)
- assert(model1a0.weights !~= model1a1.weights absTol 1E-3)
+ assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
- assert(model1a0.weights ~== model1b.weights absTol 1E-3)
+ assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
val trainer2a = (new LinearRegression).setFitIntercept(true)
@@ -675,9 +685,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model2a0 = trainer2a.fit(data)
val model2a1 = trainer2a.fit(weightedData)
val model2b = trainer2b.fit(weightedData)
- assert(model2a0.weights !~= model2a1.weights absTol 1E-3)
+ assert(model2a0.coefficients !~= model2a1.coefficients absTol 1E-3)
assert(model2a0.intercept !~= model2a1.intercept absTol 1E-3)
- assert(model2a0.weights ~== model2b.weights absTol 1E-3)
+ assert(model2a0.coefficients ~== model2b.coefficients absTol 1E-3)
assert(model2a0.intercept ~== model2b.intercept absTol 1E-3)
val trainer3a = (new LinearRegression).setFitIntercept(false)
@@ -687,8 +697,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model3a0 = trainer3a.fit(data)
val model3a1 = trainer3a.fit(weightedData)
val model3b = trainer3b.fit(weightedData)
- assert(model3a0.weights !~= model3a1.weights absTol 1E-3)
- assert(model3a0.weights ~== model3b.weights absTol 1E-3)
+ assert(model3a0.coefficients !~= model3a1.coefficients absTol 1E-3)
+ assert(model3a0.coefficients ~== model3b.coefficients absTol 1E-3)
val trainer4a = (new LinearRegression).setFitIntercept(false)
.setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
@@ -697,8 +707,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
val model4a0 = trainer4a.fit(data)
val model4a1 = trainer4a.fit(weightedData)
val model4b = trainer4b.fit(weightedData)
- assert(model4a0.weights !~= model4a1.weights absTol 1E-3)
- assert(model4a0.weights ~== model4b.weights absTol 1E-3)
+ assert(model4a0.coefficients !~= model4a1.coefficients absTol 1E-3)
+ assert(model4a0.coefficients ~== model4b.coefficients absTol 1E-3)
}
}