From c020f7d9d43548d27ae4a9564ba38981fd530cb1 Mon Sep 17 00:00:00 2001 From: vectorijk Date: Mon, 2 Nov 2015 16:12:04 -0800 Subject: [SPARK-10592] [ML] [PySpark] Deprecate weights and use coefficients instead in ML models Deprecated in `LogisticRegression` and `LinearRegression` Author: vectorijk Closes #9311 from vectorijk/spark-10592. --- .../ml/classification/LogisticRegression.scala | 11 +- .../org/apache/spark/ml/r/SparkRWrappers.scala | 15 +- .../ml/regression/AFTSurvivalRegression.scala | 32 ++-- .../spark/ml/regression/IsotonicRegression.scala | 4 +- .../spark/ml/regression/LinearRegression.scala | 15 +- .../ml/classification/JavaOneVsRestSuite.java | 6 +- .../classification/LogisticRegressionSuite.scala | 152 ++++++++--------- .../MultilayerPerceptronClassifierSuite.scala | 6 +- .../spark/ml/classification/OneVsRestSuite.scala | 6 +- .../ml/regression/AFTSurvivalRegressionSuite.scala | 12 +- .../ml/regression/LinearRegressionSuite.scala | 184 +++++++++++---------- 11 files changed, 235 insertions(+), 208 deletions(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 6f839ff4d7..a1335e7a1b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -392,11 +392,14 @@ class LogisticRegression(override val uid: String) @Experimental class LogisticRegressionModel private[ml] ( override val uid: String, - val weights: Vector, + val coefficients: Vector, val intercept: Double) extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel] with LogisticRegressionParams { + @deprecated("Use coefficients instead.", "1.6.0") + def weights: Vector = coefficients + override def setThreshold(value: Double): this.type = super.setThreshold(value) override def getThreshold: Double = super.getThreshold @@ -407,7 +410,7 @@ class LogisticRegressionModel private[ml] ( /** Margin (rawPrediction) for class label 1. For binary classification only. */ private val margin: Vector => Double = (features) => { - BLAS.dot(features, weights) + intercept + BLAS.dot(features, coefficients) + intercept } /** Score (probability) for class label 1. For binary classification only. */ @@ -416,7 +419,7 @@ class LogisticRegressionModel private[ml] ( 1.0 / (1.0 + math.exp(-m)) } - override val numFeatures: Int = weights.size + override val numFeatures: Int = coefficients.size override val numClasses: Int = 2 @@ -483,7 +486,7 @@ class LogisticRegressionModel private[ml] ( } override def copy(extra: ParamMap): LogisticRegressionModel = { - val newModel = copyValues(new LogisticRegressionModel(uid, weights, intercept), extra) + val newModel = copyValues(new LogisticRegressionModel(uid, coefficients, intercept), extra) if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get) newModel.setParent(parent) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala index 21ebf6d916..9162ec0e4e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala @@ -51,13 +51,22 @@ private[r] object SparkRWrappers { pipeline.fit(df) } + @deprecated("Use getModelCoefficients instead.", "1.6.0") def getModelWeights(model: PipelineModel): Array[Double] = { model.stages.last match { case m: LinearRegressionModel => Array(m.intercept) ++ m.weights.toArray - case _: LogisticRegressionModel => - throw new UnsupportedOperationException( - "No weights available for LogisticRegressionModel") // SPARK-9492 + case m: LogisticRegressionModel => + Array(m.intercept) ++ m.weights.toArray + } + } + + def getModelCoefficients(model: PipelineModel): Array[Double] = { + model.stages.last match { + case m: LinearRegressionModel => + Array(m.intercept) ++ m.coefficients.toArray + case m: LogisticRegressionModel => + Array(m.intercept) ++ m.coefficients.toArray } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index ac2c3d825f..4dbbc7d399 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -200,17 +200,17 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S val numFeatures = dataset.select($(featuresCol)).take(1)(0).getAs[Vector](0).size /* - The weights vector has three parts: + The coefficients vector has three parts: the first element: Double, log(sigma), the log of scale parameter the second element: Double, intercept of the beta parameter the third to the end elements: Doubles, regression coefficients vector of the beta parameter */ - val initialWeights = Vectors.zeros(numFeatures + 2) + val initialCoefficients = Vectors.zeros(numFeatures + 2) val states = optimizer.iterations(new CachedDiffFunction(costFun), - initialWeights.toBreeze.toDenseVector) + initialCoefficients.toBreeze.toDenseVector) - val weights = { + val coefficients = { val arrayBuilder = mutable.ArrayBuilder.make[Double] var state: optimizer.State = null while (states.hasNext) { @@ -227,10 +227,10 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S if (handlePersistence) instances.unpersist() - val coefficients = Vectors.dense(weights.slice(2, weights.length)) - val intercept = weights(1) - val scale = math.exp(weights(0)) - val model = new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale) + val regressionCoefficients = Vectors.dense(coefficients.slice(2, coefficients.length)) + val intercept = coefficients(1) + val scale = math.exp(coefficients(0)) + val model = new AFTSurvivalRegressionModel(uid, regressionCoefficients, intercept, scale) copyValues(model.setParent(this)) } @@ -251,7 +251,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S @Since("1.6.0") class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") override val uid: String, - @Since("1.6.0") val coefficients: Vector, + @Since("1.6.0") val regressionCoefficients: Vector, @Since("1.6.0") val intercept: Double, @Since("1.6.0") val scale: Double) extends Model[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams { @@ -275,7 +275,7 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") def predictQuantiles(features: Vector): Vector = { // scale parameter for the Weibull distribution of lifetime - val lambda = math.exp(BLAS.dot(coefficients, features) + intercept) + val lambda = math.exp(BLAS.dot(regressionCoefficients, features) + intercept) // shape parameter for the Weibull distribution of lifetime val k = 1 / scale val quantiles = $(quantileProbabilities).map { @@ -286,7 +286,7 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") def predict(features: Vector): Double = { - math.exp(BLAS.dot(coefficients, features) + intercept) + math.exp(BLAS.dot(regressionCoefficients, features) + intercept) } @Since("1.6.0") @@ -309,7 +309,7 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") override def copy(extra: ParamMap): AFTSurvivalRegressionModel = { - copyValues(new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale), extra) + copyValues(new AFTSurvivalRegressionModel(uid, regressionCoefficients, intercept, scale), extra) .setParent(parent) } } @@ -369,17 +369,17 @@ class AFTSurvivalRegressionModel private[ml] ( * \frac{\partial (-\iota)}{\partial (\log\sigma)}= * \sum_{i=1}^{n}[\delta_{i}+(\delta_{i}-e^{\epsilon_{i}})\epsilon_{i}] * }}} - * @param weights The log of scale parameter, the intercept and + * @param coefficients including three part: The log of scale parameter, the intercept and * regression coefficients corresponding to the features. * @param fitIntercept Whether to fit an intercept term. */ -private class AFTAggregator(weights: BDV[Double], fitIntercept: Boolean) +private class AFTAggregator(coefficients: BDV[Double], fitIntercept: Boolean) extends Serializable { // beta is the intercept and regression coefficients to the covariates - private val beta = weights.slice(1, weights.length) + private val beta = coefficients.slice(1, coefficients.length) // sigma is the scale parameter of the AFT model - private val sigma = math.exp(weights(0)) + private val sigma = math.exp(coefficients(0)) private var totalCnt: Long = 0L private var lossSum = 0.0 diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 2ff500f291..f4a17c8f9a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -87,8 +87,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures lit(1.0) } dataset.select(col($(labelCol)), f, w) - .map { case Row(label: Double, feature: Double, weights: Double) => - (label, feature, weights) + .map { case Row(label: Double, feature: Double, weight: Double) => + (label, feature, weight) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index f663b9bd9a..6e9c7442b8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -203,7 +203,7 @@ class LinearRegression(override val uid: String) val yMean = ySummarizer.mean(0) val yStd = math.sqrt(ySummarizer.variance(0)) - // If the yStd is zero, then the intercept is yMean with zero weights; + // If the yStd is zero, then the intercept is yMean with zero coefficient; // as a result, training is not needed. if (yStd == 0.0) { logWarning(s"The standard deviation of the label is zero, so the coefficients will be " + @@ -331,14 +331,17 @@ class LinearRegression(override val uid: String) @Experimental class LinearRegressionModel private[ml] ( override val uid: String, - val weights: Vector, + val coefficients: Vector, val intercept: Double) extends RegressionModel[Vector, LinearRegressionModel] with LinearRegressionParams { private var trainingSummary: Option[LinearRegressionTrainingSummary] = None - override val numFeatures: Int = weights.size + @deprecated("Use coefficients instead.", "1.6.0") + def weights: Vector = coefficients + + override val numFeatures: Int = coefficients.size /** * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is @@ -387,11 +390,11 @@ class LinearRegressionModel private[ml] ( override protected def predict(features: Vector): Double = { - dot(features, weights) + intercept + dot(features, coefficients) + intercept } override def copy(extra: ParamMap): LinearRegressionModel = { - val newModel = copyValues(new LinearRegressionModel(uid, weights, intercept), extra) + val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra) if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get) newModel.setParent(parent) } @@ -400,7 +403,7 @@ class LinearRegressionModel private[ml] ( /** * :: Experimental :: * Linear regression training results. Currently, the training summary ignores the - * training weights except for the objective trace. + * training coefficients except for the objective trace. * @param predictions predictions outputted by the model's `transform` method. * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. */ diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java index 253cabf013..cbabafe1b5 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -47,16 +47,16 @@ public class JavaOneVsRestSuite implements Serializable { jsql = new SQLContext(jsc); int nPoints = 3; - // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2. + // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2. // As a result, we are drawing samples from probability distribution of an actual model. - double[] weights = { + double[] coefficients = { -0.57997, 0.912083, -0.371077, -0.819866, 2.688191, -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 }; double[] xMean = {5.843, 3.057, 3.758, 1.199}; double[] xVariance = {0.6856, 0.1899, 3.116, 0.581}; List points = JavaConverters.seqAsJavaListConverter( - generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42) + generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42) ).asJava(); datasetRDD = jsc.parallelize(points, 2); dataset = jsql.createDataFrame(datasetRDD, LabeledPoint.class); diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index e0a795e5e0..325faf37e8 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -48,21 +48,22 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { import org.apache.spark.mllib.classification.LogisticRegressionSuite val nPoints = 10000 - val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) + val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) val xMean = Array(5.843, 3.057, 3.758, 1.199) val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) val data = sc.parallelize(LogisticRegressionSuite.generateMultinomialLogisticInput( - weights, xMean, xVariance, true, nPoints, 42), 1) + coefficients, xMean, xVariance, true, nPoints, 42), 1) data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1) + ", " + x.features(2) + ", " + x.features(3)).saveAsTextFile("path") */ binaryDataset = { val nPoints = 10000 - val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) + val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) val xMean = Array(5.843, 3.057, 3.758, 1.199) val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) - val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42) + val testData = + generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42) sqlContext.createDataFrame(sc.parallelize(testData, 4)) } @@ -296,8 +297,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0)) - weights + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0)) + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -308,14 +309,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.7996864 */ val interceptR = 2.8366423 - val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864) + val coefficientsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864) assert(model1.intercept ~== interceptR relTol 1E-3) - assert(model1.weights ~= weightsR relTol 1E-3) + assert(model1.coefficients ~= coefficientsR relTol 1E-3) // Without regularization, with or without standardization will converge to the same solution. assert(model2.intercept ~== interceptR relTol 1E-3) - assert(model2.weights ~= weightsR relTol 1E-3) + assert(model2.coefficients ~= coefficientsR relTol 1E-3) } test("binary logistic regression without intercept without regularization") { @@ -332,9 +333,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0, intercept=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -345,14 +346,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.7407946 */ val interceptR = 0.0 - val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946) + val coefficientsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946) assert(model1.intercept ~== interceptR relTol 1E-3) - assert(model1.weights ~= weightsR relTol 1E-2) + assert(model1.coefficients ~= coefficientsR relTol 1E-2) // Without regularization, with or without standardization should converge to the same solution. assert(model2.intercept ~== interceptR relTol 1E-3) - assert(model2.weights ~= weightsR relTol 1E-2) + assert(model2.coefficients ~= coefficientsR relTol 1E-2) } test("binary logistic regression with intercept with L1 regularization") { @@ -371,8 +372,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12)) - weights + coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12)) + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -383,10 +384,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.02481551 */ val interceptR1 = -0.05627428 - val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551) + val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551) assert(model1.intercept ~== interceptR1 relTol 1E-2) - assert(model1.weights ~= weightsR1 absTol 2E-2) + assert(model1.coefficients ~= coefficientsR1 absTol 2E-2) /* Using the following R code to load the data and train the model using glmnet package. @@ -395,9 +396,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, standardize=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -408,10 +409,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR2 = 0.3722152 - val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0) + val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0) assert(model2.intercept ~== interceptR2 relTol 1E-2) - assert(model2.weights ~= weightsR2 absTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 absTol 1E-3) } test("binary logistic regression without intercept with L1 regularization") { @@ -430,9 +431,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, intercept=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -443,10 +444,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.03891782 */ val interceptR1 = 0.0 - val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782) + val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights ~= weightsR1 absTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 absTol 1E-3) /* Using the following R code to load the data and train the model using glmnet package. @@ -455,9 +456,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, intercept=FALSE, standardize=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -468,10 +469,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR2 = 0.0 - val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0) + val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0) assert(model2.intercept ~== interceptR2 absTol 1E-3) - assert(model2.weights ~= weightsR2 absTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 absTol 1E-3) } test("binary logistic regression with intercept with L2 regularization") { @@ -490,8 +491,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37)) - weights + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37)) + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -502,10 +503,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.10062872 */ val interceptR1 = 0.15021751 - val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872) + val coefficientsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* Using the following R code to load the data and train the model using glmnet package. @@ -514,9 +515,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, standardize=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -527,10 +528,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.06266838 */ val interceptR2 = 0.48657516 - val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838) + val coefficientsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838) assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) } test("binary logistic regression without intercept with L2 regularization") { @@ -549,9 +550,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, intercept=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -562,10 +563,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.09799775 */ val interceptR1 = 0.0 - val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775) + val coefficientsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775) assert(model1.intercept ~== interceptR1 absTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-2) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) /* Using the following R code to load the data and train the model using glmnet package. @@ -574,9 +575,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, intercept=FALSE, standardize=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -587,10 +588,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.053314311 */ val interceptR2 = 0.0 - val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311) + val coefficientsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311) assert(model2.intercept ~== interceptR2 absTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-2) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-2) } test("binary logistic regression with intercept with ElasticNet regularization") { @@ -609,8 +610,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21)) - weights + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21)) + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -621,10 +622,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.15458796 */ val interceptR1 = 0.57734851 - val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796) + val coefficientsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796) assert(model1.intercept ~== interceptR1 relTol 6E-3) - assert(model1.weights ~== weightsR1 absTol 5E-3) + assert(model1.coefficients ~== coefficientsR1 absTol 5E-3) /* Using the following R code to load the data and train the model using glmnet package. @@ -633,9 +634,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, standardize=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -646,10 +647,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.05350074 */ val interceptR2 = 0.51555993 - val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074) + val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074) assert(model2.intercept ~== interceptR2 relTol 6E-3) - assert(model2.weights ~= weightsR2 absTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 absTol 1E-3) } test("binary logistic regression without intercept with ElasticNet regularization") { @@ -668,9 +669,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, intercept=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -681,10 +682,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 -0.142534158 */ val interceptR1 = 0.0 - val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158) + val coefficientsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights ~= weightsR1 absTol 1E-2) + assert(model1.coefficients ~= coefficientsR1 absTol 1E-2) /* Using the following R code to load the data and train the model using glmnet package. @@ -693,9 +694,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, + coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, intercept=FALSE, standardize=FALSE)) - weights + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -706,10 +707,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR2 = 0.0 - val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0) + val coefficientsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0) assert(model2.intercept ~== interceptR2 absTol 1E-3) - assert(model2.weights ~= weightsR2 absTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 absTol 1E-3) } test("binary logistic regression with intercept with strong L1 regularization") { @@ -732,8 +733,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { }).histogram /* - For binary logistic regression with strong L1 regularization, all the weights will be zeros. - As a result, + For binary logistic regression with strong L1 regularization, all the coefficients + will be zeros. As a result, {{{ P(0) = 1 / (1 + \exp(b)), and P(1) = \exp(b) / (1 + \exp(b)) @@ -743,13 +744,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { }}} */ val interceptTheory = math.log(histogram(1) / histogram(0)) - val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0) + val coefficientsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0) assert(model1.intercept ~== interceptTheory relTol 1E-5) - assert(model1.weights ~= weightsTheory absTol 1E-6) + assert(model1.coefficients ~= coefficientsTheory absTol 1E-6) assert(model2.intercept ~== interceptTheory relTol 1E-5) - assert(model2.weights ~= weightsTheory absTol 1E-6) + assert(model2.coefficients ~= coefficientsTheory absTol 1E-6) /* Using the following R code to load the data and train the model using glmnet package. @@ -758,8 +759,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE) label = factor(data$V1) features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) - weights = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0)) - weights + coefficients = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0)) + coefficients 5 x 1 sparse Matrix of class "dgCMatrix" s0 @@ -770,10 +771,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data.V5 . */ val interceptR = -0.248065 - val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0) + val coefficientsR = Vectors.dense(0.0, 0.0, 0.0, 0.0) assert(model1.intercept ~== interceptR relTol 1E-5) - assert(model1.weights ~== weightsR absTol 1E-6) + assert(model1.coefficients ~== coefficientsR absTol 1E-6) } test("evaluate on test set") { @@ -814,10 +815,11 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { test("binary logistic regression with weighted samples") { val (dataset, weightedDataset) = { val nPoints = 1000 - val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) + val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) val xMean = Array(5.843, 3.057, 3.758, 1.199) val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) - val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42) + val testData = + generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42) // Let's over-sample the positive samples twice. val data1 = testData.flatMap { case labeledPoint: LabeledPoint => @@ -863,9 +865,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model1a0 = trainer1a.fit(dataset) val model1a1 = trainer1a.fit(weightedDataset) val model1b = trainer1b.fit(weightedDataset) - assert(model1a0.weights !~= model1a1.weights absTol 1E-3) + assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3) assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3) - assert(model1a0.weights ~== model1b.weights absTol 1E-3) + assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3) assert(model1a0.intercept ~== model1b.intercept absTol 1E-3) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala index 2d1df9b2b8..17db8c4477 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala @@ -53,16 +53,16 @@ class MultilayerPerceptronClassifierSuite extends SparkFunSuite with MLlibTestSp test("3 class classification with 2 hidden layers") { val nPoints = 1000 - // The following weights are taken from OneVsRestSuite.scala + // The following coefficients are taken from OneVsRestSuite.scala // they represent 3-class iris dataset - val weights = Array( + val coefficients = Array( -0.57997, 0.912083, -0.371077, -0.819866, 2.688191, -0.16624, -0.84355, -0.048509, -0.301789, 4.170682) val xMean = Array(5.843, 3.057, 3.758, 1.199) val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) val rdd = sc.parallelize(generateMultinomialLogisticInput( - weights, xMean, xVariance, true, nPoints, 42), 2) + coefficients, xMean, xVariance, true, nPoints, 42), 2) val dataFrame = sqlContext.createDataFrame(rdd).toDF("label", "features") val numClasses = 3 val numIterations = 100 diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala index 977f0e0b70..5ea71c5317 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala @@ -43,16 +43,16 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext { val nPoints = 1000 - // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2. + // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2 // As a result, we are drawing samples from probability distribution of an actual model. - val weights = Array( + val coefficients = Array( -0.57997, 0.912083, -0.371077, -0.819866, 2.688191, -0.16624, -0.84355, -0.048509, -0.301789, 4.170682) val xMean = Array(5.843, 3.057, 3.758, 1.199) val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) rdd = sc.parallelize(generateMultinomialLogisticInput( - weights, xMean, xVariance, true, nPoints, 42), 2) + coefficients, xMean, xVariance, true, nPoints, 42), 2) dataset = sqlContext.createDataFrame(rdd) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala index 359f310271..c0f791bce1 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala @@ -141,12 +141,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex Number of Newton-Raphson Iterations: 5 n= 1000 */ - val coefficientsR = Vectors.dense(-0.039) + val regressionCoefficientsR = Vectors.dense(-0.039) val interceptR = 1.759 val scaleR = 1.41 assert(model.intercept ~== interceptR relTol 1E-3) - assert(model.coefficients ~== coefficientsR relTol 1E-3) + assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3) assert(model.scale ~== scaleR relTol 1E-3) /* @@ -212,12 +212,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex Number of Newton-Raphson Iterations: 5 n= 1000 */ - val coefficientsR = Vectors.dense(-0.0844, 0.0677) + val regressionCoefficientsR = Vectors.dense(-0.0844, 0.0677) val interceptR = 1.9206 val scaleR = 0.977 assert(model.intercept ~== interceptR relTol 1E-3) - assert(model.coefficients ~== coefficientsR relTol 1E-3) + assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3) assert(model.scale ~== scaleR relTol 1E-3) /* @@ -282,12 +282,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex Number of Newton-Raphson Iterations: 6 n= 1000 */ - val coefficientsR = Vectors.dense(0.896, -0.709) + val regressionCoefficientsR = Vectors.dense(0.896, -0.709) val interceptR = 0.0 val scaleR = 1.52 assert(model.intercept === interceptR) - assert(model.coefficients ~== coefficientsR relTol 1E-3) + assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3) assert(model.scale ~== scaleR relTol 1E-3) /* diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala index a2a5c0bbdc..235c796d78 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala @@ -122,8 +122,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE) features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3))) label <- as.numeric(data$V1) - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0)) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 6.298698 @@ -131,17 +131,18 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 7.199082 */ val interceptR = 6.298698 - val weightsR = Vectors.dense(4.700706, 7.199082) + val coefficientsR = Vectors.dense(4.700706, 7.199082) assert(model1.intercept ~== interceptR relTol 1E-3) - assert(model1.weights ~= weightsR relTol 1E-3) + assert(model1.coefficients ~= coefficientsR relTol 1E-3) assert(model2.intercept ~== interceptR relTol 1E-3) - assert(model2.weights ~= weightsR relTol 1E-3) + assert(model2.coefficients ~= coefficientsR relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -159,37 +160,37 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0, + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0, intercept = FALSE)) - > weights + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . as.numeric.data.V2. 6.995908 as.numeric.data.V3. 5.275131 */ - val weightsR = Vectors.dense(6.995908, 5.275131) + val coefficientsR = Vectors.dense(6.995908, 5.275131) assert(model1.intercept ~== 0 absTol 1E-3) - assert(model1.weights ~= weightsR relTol 1E-3) + assert(model1.coefficients ~= coefficientsR relTol 1E-3) assert(model2.intercept ~== 0 absTol 1E-3) - assert(model2.weights ~= weightsR relTol 1E-3) + assert(model2.coefficients ~= coefficientsR relTol 1E-3) /* Then again with the data with no intercept: - > weightsWithoutIntercept + > coefficientsWithourIntercept 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . as.numeric.data3.V2. 4.70011 as.numeric.data3.V3. 7.19943 */ - val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943) + val coefficientsWithourInterceptR = Vectors.dense(4.70011, 7.19943) assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3) - assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR relTol 1E-3) + assert(modelWithoutIntercept1.coefficients ~= coefficientsWithourInterceptR relTol 1E-3) assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3) - assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR relTol 1E-3) + assert(modelWithoutIntercept2.coefficients ~= coefficientsWithourInterceptR relTol 1E-3) } } @@ -211,8 +212,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2 = trainer2.fit(datasetWithDenseFeature) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", + alpha = 1.0, lambda = 0.57 )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 6.24300 @@ -220,14 +222,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 6.679841 */ val interceptR1 = 6.24300 - val weightsR1 = Vectors.dense(4.024821, 6.679841) + val coefficientsR1 = Vectors.dense(4.024821, 6.679841) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57, - standardize=FALSE)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, + lambda = 0.57, standardize=FALSE )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 6.416948 @@ -235,16 +237,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 6.724286 */ val interceptR2 = 6.416948 - val weightsR2 = Vectors.dense(3.893869, 6.724286) + val coefficientsR2 = Vectors.dense(3.893869, 6.724286) assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction") .collect().foreach { case Row(features: DenseVector, prediction1: Double) => - val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) + - model1.intercept + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -269,9 +272,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2 = trainer2.fit(datasetWithDenseFeature) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57, - intercept=FALSE)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, + lambda = 0.57, intercept=FALSE )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . @@ -279,15 +282,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 4.772913 */ val interceptR1 = 0.0 - val weightsR1 = Vectors.dense(6.299752, 4.772913) + val coefficientsR1 = Vectors.dense(6.299752, 4.772913) assert(model1.intercept ~== interceptR1 absTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57, - intercept=FALSE, standardize=FALSE)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, + lambda = 0.57, intercept=FALSE, standardize=FALSE )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . @@ -295,16 +298,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 4.764229 */ val interceptR2 = 0.0 - val weightsR2 = Vectors.dense(6.232193, 4.764229) + val coefficientsR2 = Vectors.dense(6.232193, 4.764229) assert(model2.intercept ~== interceptR2 absTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction") .collect().foreach { case Row(features: DenseVector, prediction1: Double) => - val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) + - model1.intercept + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -321,8 +325,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2 = trainer2.fit(datasetWithDenseFeature) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3)) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 5.269376 @@ -330,15 +334,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 5.712356) */ val interceptR1 = 5.269376 - val weightsR1 = Vectors.dense(3.736216, 5.712356) + val coefficientsR1 = Vectors.dense(3.736216, 5.712356) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, standardize=FALSE)) - > weights + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 5.791109 @@ -346,15 +350,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 5.910406 */ val interceptR2 = 5.791109 - val weightsR2 = Vectors.dense(3.435466, 5.910406) + val coefficientsR2 = Vectors.dense(3.435466, 5.910406) assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -370,9 +375,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2 = trainer2.fit(datasetWithDenseFeature) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, intercept = FALSE)) - > weights + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . @@ -380,15 +385,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 4.214502 */ val interceptR1 = 0.0 - val weightsR1 = Vectors.dense(5.522875, 4.214502) + val coefficientsR1 = Vectors.dense(5.522875, 4.214502) assert(model1.intercept ~== interceptR1 absTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, intercept = FALSE, standardize=FALSE)) - > weights + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . @@ -396,15 +401,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 4.187419 */ val interceptR2 = 0.0 - val weightsR2 = Vectors.dense(5.263704, 4.187419) + val coefficientsR2 = Vectors.dense(5.263704, 4.187419) assert(model2.intercept ~== interceptR2 absTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -428,8 +434,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2 = trainer2.fit(datasetWithDenseFeature) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, + lambda = 1.6 )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 6.324108 @@ -437,15 +444,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 5.200403 */ val interceptR1 = 5.696056 - val weightsR1 = Vectors.dense(3.670489, 6.001122) + val coefficientsR1 = Vectors.dense(3.670489, 6.001122) assert(model1.intercept ~== interceptR1 relTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6 + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6 standardize=FALSE)) - > weights + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) 6.114723 @@ -453,16 +460,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 6.146531 */ val interceptR2 = 6.114723 - val weightsR2 = Vectors.dense(3.409937, 6.146531) + val coefficientsR2 = Vectors.dense(3.409937, 6.146531) assert(model2.intercept ~== interceptR2 relTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction") .collect().foreach { case Row(features: DenseVector, prediction1: Double) => - val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) + - model1.intercept + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -487,9 +495,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2 = trainer2.fit(datasetWithDenseFeature) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6, - intercept=FALSE)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, + lambda = 1.6, intercept=FALSE )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . @@ -497,15 +505,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.dataM.V3. 4.322251 */ val interceptR1 = 0.0 - val weightsR1 = Vectors.dense(5.673348, 4.322251) + val coefficientsR1 = Vectors.dense(5.673348, 4.322251) assert(model1.intercept ~== interceptR1 absTol 1E-3) - assert(model1.weights ~= weightsR1 relTol 1E-3) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-3) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6, - intercept=FALSE, standardize=FALSE)) - > weights + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, + lambda = 1.6, intercept=FALSE, standardize=FALSE )) + > coefficients 3 x 1 sparse Matrix of class "dgCMatrix" s0 (Intercept) . @@ -513,16 +521,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V3. 4.297622 */ val interceptR2 = 0.0 - val weightsR2 = Vectors.dense(5.477988, 4.297622) + val coefficientsR2 = Vectors.dense(5.477988, 4.297622) assert(model2.intercept ~== interceptR2 absTol 1E-3) - assert(model2.weights ~= weightsR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) model1.transform(datasetWithDenseFeature).select("features", "prediction") .collect().foreach { case Row(features: DenseVector, prediction1: Double) => - val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) + - model1.intercept + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -554,7 +563,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val expectedResiduals = datasetWithDenseFeature.select("features", "label") .map { case Row(features: DenseVector, label: Double) => val prediction = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model.coefficients(0) + features(1) * model.coefficients(1) + + model.intercept label - prediction } .zip(model.summary.residuals.map(_.getDouble(0))) @@ -663,9 +673,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model1a1 = trainer1a.fit(weightedData) val model1b = trainer1b.fit(weightedData) - assert(model1a0.weights !~= model1a1.weights absTol 1E-3) + assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3) assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3) - assert(model1a0.weights ~== model1b.weights absTol 1E-3) + assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3) assert(model1a0.intercept ~== model1b.intercept absTol 1E-3) val trainer2a = (new LinearRegression).setFitIntercept(true) @@ -675,9 +685,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model2a0 = trainer2a.fit(data) val model2a1 = trainer2a.fit(weightedData) val model2b = trainer2b.fit(weightedData) - assert(model2a0.weights !~= model2a1.weights absTol 1E-3) + assert(model2a0.coefficients !~= model2a1.coefficients absTol 1E-3) assert(model2a0.intercept !~= model2a1.intercept absTol 1E-3) - assert(model2a0.weights ~== model2b.weights absTol 1E-3) + assert(model2a0.coefficients ~== model2b.coefficients absTol 1E-3) assert(model2a0.intercept ~== model2b.intercept absTol 1E-3) val trainer3a = (new LinearRegression).setFitIntercept(false) @@ -687,8 +697,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model3a0 = trainer3a.fit(data) val model3a1 = trainer3a.fit(weightedData) val model3b = trainer3b.fit(weightedData) - assert(model3a0.weights !~= model3a1.weights absTol 1E-3) - assert(model3a0.weights ~== model3b.weights absTol 1E-3) + assert(model3a0.coefficients !~= model3a1.coefficients absTol 1E-3) + assert(model3a0.coefficients ~== model3b.coefficients absTol 1E-3) val trainer4a = (new LinearRegression).setFitIntercept(false) .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver) @@ -697,8 +707,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { val model4a0 = trainer4a.fit(data) val model4a1 = trainer4a.fit(weightedData) val model4b = trainer4b.fit(weightedData) - assert(model4a0.weights !~= model4a1.weights absTol 1E-3) - assert(model4a0.weights ~== model4b.weights absTol 1E-3) + assert(model4a0.coefficients !~= model4a1.coefficients absTol 1E-3) + assert(model4a0.coefficients ~== model4b.coefficients absTol 1E-3) } } -- cgit v1.2.3