[SPARK-10592] [ML] [PySpark] Deprecate weights and use coefficients instead in ML models

Deprecated in `LogisticRegression` and `LinearRegression` Author: vectorijk <jiangkai@gmail.com> Closes #9311 from vectorijk/spark-10592.
author: vectorijk <jiangkai@gmail.com> 2015-11-02 16:12:04 -0800
committer: DB Tsai <dbt@netflix.com> 2015-11-02 16:12:04 -0800
commit: c020f7d9d43548d27ae4a9564ba38981fd530cb1 (patch)
tree: 8dc46ed1b48d88852323747b2d86aedd1c770b64 /mllib
parent: ec03866a7ef2d0826520755d47c8c9480148a76c (diff)
download: spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.gz
spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.bz2
spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.zip
11 files changed, 235 insertions, 208 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 6f839ff4d7..a1335e7a1b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -392,11 +392,14 @@ class LogisticRegression(override val uid: String)
 @Experimental
 class LogisticRegressionModel private[ml] (
     override val uid: String,
-    val weights: Vector,
+    val coefficients: Vector,
     val intercept: Double)
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
   with LogisticRegressionParams {
 
+  @deprecated("Use coefficients instead.", "1.6.0")
+  def weights: Vector = coefficients
+
   override def setThreshold(value: Double): this.type = super.setThreshold(value)
 
   override def getThreshold: Double = super.getThreshold
@@ -407,7 +410,7 @@ class LogisticRegressionModel private[ml] (
 
   /** Margin (rawPrediction) for class label 1.  For binary classification only. */
   private val margin: Vector => Double = (features) => {
-    BLAS.dot(features, weights) + intercept
+    BLAS.dot(features, coefficients) + intercept
   }
 
   /** Score (probability) for class label 1.  For binary classification only. */
@@ -416,7 +419,7 @@ class LogisticRegressionModel private[ml] (
     1.0 / (1.0 + math.exp(-m))
   }
 
-  override val numFeatures: Int = weights.size
+  override val numFeatures: Int = coefficients.size
 
   override val numClasses: Int = 2
 
@@ -483,7 +486,7 @@ class LogisticRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): LogisticRegressionModel = {
-    val newModel = copyValues(new LogisticRegressionModel(uid, weights, intercept), extra)
+    val newModel = copyValues(new LogisticRegressionModel(uid, coefficients, intercept), extra)
     if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
     newModel.setParent(parent)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
index 21ebf6d916..9162ec0e4e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
@@ -51,13 +51,22 @@ private[r] object SparkRWrappers {
     pipeline.fit(df)
   }
 
+  @deprecated("Use getModelCoefficients instead.", "1.6.0")
   def getModelWeights(model: PipelineModel): Array[Double] = {
     model.stages.last match {
       case m: LinearRegressionModel =>
         Array(m.intercept) ++ m.weights.toArray
-      case _: LogisticRegressionModel =>
-        throw new UnsupportedOperationException(
-          "No weights available for LogisticRegressionModel")  // SPARK-9492
+      case m: LogisticRegressionModel =>
+        Array(m.intercept) ++ m.weights.toArray
+    }
+  }
+
+  def getModelCoefficients(model: PipelineModel): Array[Double] = {
+    model.stages.last match {
+      case m: LinearRegressionModel =>
+        Array(m.intercept) ++ m.coefficients.toArray
+      case m: LogisticRegressionModel =>
+        Array(m.intercept) ++ m.coefficients.toArray
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index ac2c3d825f..4dbbc7d399 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -200,17 +200,17 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
 
     val numFeatures = dataset.select($(featuresCol)).take(1)(0).getAs[Vector](0).size
     /*
-       The weights vector has three parts:
+       The coefficients vector has three parts:
        the first element: Double, log(sigma), the log of scale parameter
        the second element: Double, intercept of the beta parameter
        the third to the end elements: Doubles, regression coefficients vector of the beta parameter
      */
-    val initialWeights = Vectors.zeros(numFeatures + 2)
+    val initialCoefficients = Vectors.zeros(numFeatures + 2)
 
     val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialWeights.toBreeze.toDenseVector)
+      initialCoefficients.toBreeze.toDenseVector)
 
-    val weights = {
+    val coefficients = {
       val arrayBuilder = mutable.ArrayBuilder.make[Double]
       var state: optimizer.State = null
       while (states.hasNext) {
@@ -227,10 +227,10 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
 
     if (handlePersistence) instances.unpersist()
 
-    val coefficients = Vectors.dense(weights.slice(2, weights.length))
-    val intercept = weights(1)
-    val scale = math.exp(weights(0))
-    val model = new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale)
+    val regressionCoefficients = Vectors.dense(coefficients.slice(2, coefficients.length))
+    val intercept = coefficients(1)
+    val scale = math.exp(coefficients(0))
+    val model = new AFTSurvivalRegressionModel(uid, regressionCoefficients, intercept, scale)
     copyValues(model.setParent(this))
   }
 
@@ -251,7 +251,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
 @Since("1.6.0")
 class AFTSurvivalRegressionModel private[ml] (
     @Since("1.6.0") override val uid: String,
-    @Since("1.6.0") val coefficients: Vector,
+    @Since("1.6.0") val regressionCoefficients: Vector,
     @Since("1.6.0") val intercept: Double,
     @Since("1.6.0") val scale: Double)
   extends Model[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams {
@@ -275,7 +275,7 @@ class AFTSurvivalRegressionModel private[ml] (
   @Since("1.6.0")
   def predictQuantiles(features: Vector): Vector = {
     // scale parameter for the Weibull distribution of lifetime
-    val lambda = math.exp(BLAS.dot(coefficients, features) + intercept)
+    val lambda = math.exp(BLAS.dot(regressionCoefficients, features) + intercept)
     // shape parameter for the Weibull distribution of lifetime
     val k = 1 / scale
     val quantiles = $(quantileProbabilities).map {
@@ -286,7 +286,7 @@ class AFTSurvivalRegressionModel private[ml] (
 
   @Since("1.6.0")
   def predict(features: Vector): Double = {
-    math.exp(BLAS.dot(coefficients, features) + intercept)
+    math.exp(BLAS.dot(regressionCoefficients, features) + intercept)
   }
 
   @Since("1.6.0")
@@ -309,7 +309,7 @@ class AFTSurvivalRegressionModel private[ml] (
 
   @Since("1.6.0")
   override def copy(extra: ParamMap): AFTSurvivalRegressionModel = {
-    copyValues(new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale), extra)
+    copyValues(new AFTSurvivalRegressionModel(uid, regressionCoefficients, intercept, scale), extra)
       .setParent(parent)
   }
 }
@@ -369,17 +369,17 @@ class AFTSurvivalRegressionModel private[ml] (
  *   \frac{\partial (-\iota)}{\partial (\log\sigma)}=
  *   \sum_{i=1}^{n}[\delta_{i}+(\delta_{i}-e^{\epsilon_{i}})\epsilon_{i}]
  * }}}
- * @param weights The log of scale parameter, the intercept and
+ * @param coefficients including three part: The log of scale parameter, the intercept and
  *                regression coefficients corresponding to the features.
  * @param fitIntercept Whether to fit an intercept term.
  */
-private class AFTAggregator(weights: BDV[Double], fitIntercept: Boolean)
+private class AFTAggregator(coefficients: BDV[Double], fitIntercept: Boolean)
   extends Serializable {
 
   // beta is the intercept and regression coefficients to the covariates
-  private val beta = weights.slice(1, weights.length)
+  private val beta = coefficients.slice(1, coefficients.length)
   // sigma is the scale parameter of the AFT model
-  private val sigma = math.exp(weights(0))
+  private val sigma = math.exp(coefficients(0))
 
   private var totalCnt: Long = 0L
   private var lossSum = 0.0
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 2ff500f291..f4a17c8f9a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -87,8 +87,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
       lit(1.0)
     }
     dataset.select(col($(labelCol)), f, w)
-      .map { case Row(label: Double, feature: Double, weights: Double) =>
-      (label, feature, weights)
+      .map { case Row(label: Double, feature: Double, weight: Double) =>
+      (label, feature, weight)
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index f663b9bd9a..6e9c7442b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -203,7 +203,7 @@ class LinearRegression(override val uid: String)
     val yMean = ySummarizer.mean(0)
     val yStd = math.sqrt(ySummarizer.variance(0))
 
-    // If the yStd is zero, then the intercept is yMean with zero weights;
+    // If the yStd is zero, then the intercept is yMean with zero coefficient;
     // as a result, training is not needed.
     if (yStd == 0.0) {
       logWarning(s"The standard deviation of the label is zero, so the coefficients will be " +
@@ -331,14 +331,17 @@ class LinearRegression(override val uid: String)
 @Experimental
 class LinearRegressionModel private[ml] (
     override val uid: String,
-    val weights: Vector,
+    val coefficients: Vector,
     val intercept: Double)
   extends RegressionModel[Vector, LinearRegressionModel]
   with LinearRegressionParams {
 
   private var trainingSummary: Option[LinearRegressionTrainingSummary] = None
 
-  override val numFeatures: Int = weights.size
+  @deprecated("Use coefficients instead.", "1.6.0")
+  def weights: Vector = coefficients
+
+  override val numFeatures: Int = coefficients.size
 
   /**
    * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is
@@ -387,11 +390,11 @@ class LinearRegressionModel private[ml] (
 
 
   override protected def predict(features: Vector): Double = {
-    dot(features, weights) + intercept
+    dot(features, coefficients) + intercept
   }
 
   override def copy(extra: ParamMap): LinearRegressionModel = {
-    val newModel = copyValues(new LinearRegressionModel(uid, weights, intercept), extra)
+    val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra)
     if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
     newModel.setParent(parent)
   }
@@ -400,7 +403,7 @@ class LinearRegressionModel private[ml] (
 /**
  * :: Experimental ::
  * Linear regression training results. Currently, the training summary ignores the
- * training weights except for the objective trace.
+ * training coefficients except for the objective trace.
  * @param predictions predictions outputted by the model's `transform` method.
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
  */
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
index 253cabf013..cbabafe1b5 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
@@ -47,16 +47,16 @@ public class JavaOneVsRestSuite implements Serializable {
         jsql = new SQLContext(jsc);
         int nPoints = 3;
 
-        // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+        // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2.
         // As a result, we are drawing samples from probability distribution of an actual model.
-        double[] weights = {
+        double[] coefficients = {
                 -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
                 -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 };
 
         double[] xMean = {5.843, 3.057, 3.758, 1.199};
         double[] xVariance = {0.6856, 0.1899, 3.116, 0.581};
         List<LabeledPoint> points = JavaConverters.seqAsJavaListConverter(
-            generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+            generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
         ).asJava();
         datasetRDD = jsc.parallelize(points, 2);
         dataset = jsql.createDataFrame(datasetRDD, LabeledPoint.class);
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e0a795e5e0..325faf37e8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -48,21 +48,22 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
 
        import org.apache.spark.mllib.classification.LogisticRegressionSuite
        val nPoints = 10000
-       val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+       val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
        val xMean = Array(5.843, 3.057, 3.758, 1.199)
        val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
        val data = sc.parallelize(LogisticRegressionSuite.generateMultinomialLogisticInput(
-         weights, xMean, xVariance, true, nPoints, 42), 1)
+         coefficients, xMean, xVariance, true, nPoints, 42), 1)
        data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1) + ", "
          + x.features(2) + ", " + x.features(3)).saveAsTextFile("path")
      */
     binaryDataset = {
       val nPoints = 10000
-      val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+      val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
       val xMean = Array(5.843, 3.057, 3.758, 1.199)
       val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
 
-      val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+      val testData =
+        generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
 
       sqlContext.createDataFrame(sc.parallelize(testData, 4))
     }
@@ -296,8 +297,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
-       weights
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
@@ -308,14 +309,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.7996864
      */
     val interceptR = 2.8366423
-    val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
+    val coefficientsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
 
     assert(model1.intercept ~== interceptR relTol 1E-3)
-    assert(model1.weights ~= weightsR relTol 1E-3)
+    assert(model1.coefficients ~= coefficientsR relTol 1E-3)
 
     // Without regularization, with or without standardization will converge to the same solution.
     assert(model2.intercept ~== interceptR relTol 1E-3)
-    assert(model2.weights ~= weightsR relTol 1E-3)
+    assert(model2.coefficients ~= coefficientsR relTol 1E-3)
   }
 
   test("binary logistic regression without intercept without regularization") {
@@ -332,9 +333,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights =
+       coefficients =
            coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0, intercept=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
@@ -345,14 +346,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.7407946
      */
     val interceptR = 0.0
-    val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
+    val coefficientsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
 
     assert(model1.intercept ~== interceptR relTol 1E-3)
-    assert(model1.weights ~= weightsR relTol 1E-2)
+    assert(model1.coefficients ~= coefficientsR relTol 1E-2)
 
     // Without regularization, with or without standardization should converge to the same solution.
     assert(model2.intercept ~== interceptR relTol 1E-3)
-    assert(model2.weights ~= weightsR relTol 1E-2)
+    assert(model2.coefficients ~= coefficientsR relTol 1E-2)
   }
 
   test("binary logistic regression with intercept with L1 regularization") {
@@ -371,8 +372,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
-       weights
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -383,10 +384,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.02481551
      */
     val interceptR1 = -0.05627428
-    val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
+    val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-2)
-    assert(model1.weights ~= weightsR1 absTol 2E-2)
+    assert(model1.coefficients ~= coefficientsR1 absTol 2E-2)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -395,9 +396,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
            standardize=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
@@ -408,10 +409,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5       .
      */
     val interceptR2 = 0.3722152
-    val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
+    val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
 
     assert(model2.intercept ~== interceptR2 relTol 1E-2)
-    assert(model2.weights ~= weightsR2 absTol 1E-3)
+    assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
   }
 
   test("binary logistic regression without intercept with L1 regularization") {
@@ -430,9 +431,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
            intercept=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -443,10 +444,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.03891782
      */
     val interceptR1 = 0.0
-    val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
+    val coefficientsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights ~= weightsR1 absTol 1E-3)
+    assert(model1.coefficients ~= coefficientsR1 absTol 1E-3)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -455,9 +456,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
            intercept=FALSE, standardize=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -468,10 +469,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5       .
      */
     val interceptR2 = 0.0
-    val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
+    val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
 
     assert(model2.intercept ~== interceptR2 absTol 1E-3)
-    assert(model2.weights ~= weightsR2 absTol 1E-3)
+    assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
   }
 
   test("binary logistic regression with intercept with L2 regularization") {
@@ -490,8 +491,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
-       weights
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -502,10 +503,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.10062872
      */
     val interceptR1 = 0.15021751
-    val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
+    val coefficientsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights ~= weightsR1 relTol 1E-3)
+    assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -514,9 +515,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
            standardize=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -527,10 +528,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.06266838
      */
     val interceptR2 = 0.48657516
-    val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
+    val coefficientsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
 
     assert(model2.intercept ~== interceptR2 relTol 1E-3)
-    assert(model2.weights ~= weightsR2 relTol 1E-3)
+    assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
   }
 
   test("binary logistic regression without intercept with L2 regularization") {
@@ -549,9 +550,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
            intercept=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -562,10 +563,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.09799775
      */
     val interceptR1 = 0.0
-    val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
+    val coefficientsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
 
     assert(model1.intercept ~== interceptR1 absTol 1E-3)
-    assert(model1.weights ~= weightsR1 relTol 1E-2)
+    assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -574,9 +575,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
            intercept=FALSE, standardize=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                              s0
@@ -587,10 +588,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.053314311
      */
     val interceptR2 = 0.0
-    val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
+    val coefficientsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
 
     assert(model2.intercept ~== interceptR2 absTol 1E-3)
-    assert(model2.weights ~= weightsR2 relTol 1E-2)
+    assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
   }
 
   test("binary logistic regression with intercept with ElasticNet regularization") {
@@ -609,8 +610,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
-       weights
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -621,10 +622,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.15458796
      */
     val interceptR1 = 0.57734851
-    val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
+    val coefficientsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
 
     assert(model1.intercept ~== interceptR1 relTol 6E-3)
-    assert(model1.weights ~== weightsR1 absTol 5E-3)
+    assert(model1.coefficients ~== coefficientsR1 absTol 5E-3)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -633,9 +634,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
            standardize=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -646,10 +647,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.05350074
      */
     val interceptR2 = 0.51555993
-    val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
+    val coefficientsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
 
     assert(model2.intercept ~== interceptR2 relTol 6E-3)
-    assert(model2.weights ~= weightsR2 absTol 1E-3)
+    assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
   }
 
   test("binary logistic regression without intercept with ElasticNet regularization") {
@@ -668,9 +669,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
            intercept=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -681,10 +682,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5     -0.142534158
      */
     val interceptR1 = 0.0
-    val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
+    val coefficientsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights ~= weightsR1 absTol 1E-2)
+    assert(model1.coefficients ~= coefficientsR1 absTol 1E-2)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -693,9 +694,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
            intercept=FALSE, standardize=FALSE))
-       weights
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -706,10 +707,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5       .
      */
     val interceptR2 = 0.0
-    val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
+    val coefficientsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
 
     assert(model2.intercept ~== interceptR2 absTol 1E-3)
-    assert(model2.weights ~= weightsR2 absTol 1E-3)
+    assert(model2.coefficients ~= coefficientsR2 absTol 1E-3)
   }
 
   test("binary logistic regression with intercept with strong L1 regularization") {
@@ -732,8 +733,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
         }).histogram
 
     /*
-       For binary logistic regression with strong L1 regularization, all the weights will be zeros.
-       As a result,
+       For binary logistic regression with strong L1 regularization, all the coefficients
+       will be zeros. As a result,
        {{{
        P(0) = 1 / (1 + \exp(b)), and
        P(1) = \exp(b) / (1 + \exp(b))
@@ -743,13 +744,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        }}}
      */
     val interceptTheory = math.log(histogram(1) / histogram(0))
-    val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
+    val coefficientsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
 
     assert(model1.intercept ~== interceptTheory relTol 1E-5)
-    assert(model1.weights ~= weightsTheory absTol 1E-6)
+    assert(model1.coefficients ~= coefficientsTheory absTol 1E-6)
 
     assert(model2.intercept ~== interceptTheory relTol 1E-5)
-    assert(model2.weights ~= weightsTheory absTol 1E-6)
+    assert(model2.coefficients ~= coefficientsTheory absTol 1E-6)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -758,8 +759,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data <- read.csv("path", header=FALSE)
        label = factor(data$V1)
        features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
-       weights = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
-       weights
+       coefficients = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
+       coefficients
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
@@ -770,10 +771,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
        data.V5       .
      */
     val interceptR = -0.248065
-    val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
+    val coefficientsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
 
     assert(model1.intercept ~== interceptR relTol 1E-5)
-    assert(model1.weights ~== weightsR absTol 1E-6)
+    assert(model1.coefficients ~== coefficientsR absTol 1E-6)
   }
 
   test("evaluate on test set") {
@@ -814,10 +815,11 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("binary logistic regression with weighted samples") {
     val (dataset, weightedDataset) = {
       val nPoints = 1000
-      val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+      val coefficients = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
       val xMean = Array(5.843, 3.057, 3.758, 1.199)
       val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
-      val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
+      val testData =
+        generateMultinomialLogisticInput(coefficients, xMean, xVariance, true, nPoints, 42)
 
       // Let's over-sample the positive samples twice.
       val data1 = testData.flatMap { case labeledPoint: LabeledPoint =>
@@ -863,9 +865,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
     val model1a0 = trainer1a.fit(dataset)
     val model1a1 = trainer1a.fit(weightedDataset)
     val model1b = trainer1b.fit(weightedDataset)
-    assert(model1a0.weights !~= model1a1.weights absTol 1E-3)
+    assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
     assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
-    assert(model1a0.weights ~== model1b.weights absTol 1E-3)
+    assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
     assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
 
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index 2d1df9b2b8..17db8c4477 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -53,16 +53,16 @@ class MultilayerPerceptronClassifierSuite extends SparkFunSuite with MLlibTestSp
   test("3 class classification with 2 hidden layers") {
     val nPoints = 1000
 
-    // The following weights are taken from OneVsRestSuite.scala
+    // The following coefficients are taken from OneVsRestSuite.scala
     // they represent 3-class iris dataset
-    val weights = Array(
+    val coefficients = Array(
       -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
       -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
 
     val xMean = Array(5.843, 3.057, 3.758, 1.199)
     val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
     val rdd = sc.parallelize(generateMultinomialLogisticInput(
-      weights, xMean, xVariance, true, nPoints, 42), 2)
+      coefficients, xMean, xVariance, true, nPoints, 42), 2)
     val dataFrame = sqlContext.createDataFrame(rdd).toDF("label", "features")
     val numClasses = 3
     val numIterations = 100
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 977f0e0b70..5ea71c5317 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -43,16 +43,16 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     val nPoints = 1000
 
-    // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+    // The following coefficients and xMean/xVariance are computed from iris dataset with lambda=0.2
     // As a result, we are drawing samples from probability distribution of an actual model.
-    val weights = Array(
+    val coefficients = Array(
       -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
       -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
 
     val xMean = Array(5.843, 3.057, 3.758, 1.199)
     val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
     rdd = sc.parallelize(generateMultinomialLogisticInput(
-      weights, xMean, xVariance, true, nPoints, 42), 2)
+      coefficients, xMean, xVariance, true, nPoints, 42), 2)
     dataset = sqlContext.createDataFrame(rdd)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index 359f310271..c0f791bce1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -141,12 +141,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
        Number of Newton-Raphson Iterations: 5
        n= 1000
      */
-    val coefficientsR = Vectors.dense(-0.039)
+    val regressionCoefficientsR = Vectors.dense(-0.039)
     val interceptR = 1.759
     val scaleR = 1.41
 
     assert(model.intercept ~== interceptR relTol 1E-3)
-    assert(model.coefficients ~== coefficientsR relTol 1E-3)
+    assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
     assert(model.scale ~== scaleR relTol 1E-3)
 
     /*
@@ -212,12 +212,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
        Number of Newton-Raphson Iterations: 5
        n= 1000
      */
-    val coefficientsR = Vectors.dense(-0.0844, 0.0677)
+    val regressionCoefficientsR = Vectors.dense(-0.0844, 0.0677)
     val interceptR = 1.9206
     val scaleR = 0.977
 
     assert(model.intercept ~== interceptR relTol 1E-3)
-    assert(model.coefficients ~== coefficientsR relTol 1E-3)
+    assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
     assert(model.scale ~== scaleR relTol 1E-3)
 
     /*
@@ -282,12 +282,12 @@ class AFTSurvivalRegressionSuite extends SparkFunSuite with MLlibTestSparkContex
        Number of Newton-Raphson Iterations: 6
        n= 1000
      */
-    val coefficientsR = Vectors.dense(0.896, -0.709)
+    val regressionCoefficientsR = Vectors.dense(0.896, -0.709)
     val interceptR = 0.0
     val scaleR = 1.52
 
     assert(model.intercept === interceptR)
-    assert(model.coefficients ~== coefficientsR relTol 1E-3)
+    assert(model.regressionCoefficients ~== regressionCoefficientsR relTol 1E-3)
     assert(model.scale ~== scaleR relTol 1E-3)
 
     /*
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index a2a5c0bbdc..235c796d78 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -122,8 +122,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
          data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
          features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
          label <- as.numeric(data$V1)
-         weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
-         > weights
+         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
+         > coefficients
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)         6.298698
@@ -131,17 +131,18 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
          as.numeric.data.V3. 7.199082
        */
       val interceptR = 6.298698
-      val weightsR = Vectors.dense(4.700706, 7.199082)
+      val coefficientsR = Vectors.dense(4.700706, 7.199082)
 
       assert(model1.intercept ~== interceptR relTol 1E-3)
-      assert(model1.weights ~= weightsR relTol 1E-3)
+      assert(model1.coefficients ~= coefficientsR relTol 1E-3)
       assert(model2.intercept ~== interceptR relTol 1E-3)
-      assert(model2.weights ~= weightsR relTol 1E-3)
+      assert(model2.coefficients ~= coefficientsR relTol 1E-3)
 
       model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
-            features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+              model1.intercept
           assert(prediction1 ~== prediction2 relTol 1E-5)
       }
     }
@@ -159,37 +160,37 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept)
 
       /*
-         weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
+         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
            intercept = FALSE))
-         > weights
+         > coefficients
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)         .
          as.numeric.data.V2. 6.995908
          as.numeric.data.V3. 5.275131
        */
-      val weightsR = Vectors.dense(6.995908, 5.275131)
+      val coefficientsR = Vectors.dense(6.995908, 5.275131)
 
       assert(model1.intercept ~== 0 absTol 1E-3)
-      assert(model1.weights ~= weightsR relTol 1E-3)
+      assert(model1.coefficients ~= coefficientsR relTol 1E-3)
       assert(model2.intercept ~== 0 absTol 1E-3)
-      assert(model2.weights ~= weightsR relTol 1E-3)
+      assert(model2.coefficients ~= coefficientsR relTol 1E-3)
 
       /*
          Then again with the data with no intercept:
-         > weightsWithoutIntercept
+         > coefficientsWithourIntercept
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)           .
          as.numeric.data3.V2. 4.70011
          as.numeric.data3.V3. 7.19943
        */
-      val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
+      val coefficientsWithourInterceptR = Vectors.dense(4.70011, 7.19943)
 
       assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
-      assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR relTol 1E-3)
+      assert(modelWithoutIntercept1.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
       assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
-      assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR relTol 1E-3)
+      assert(modelWithoutIntercept2.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
     }
   }
 
@@ -211,8 +212,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
         val model2 = trainer2.fit(datasetWithDenseFeature)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian",
+             alpha = 1.0, lambda = 0.57 ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
            (Intercept)         6.24300
@@ -220,14 +222,14 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 6.679841
          */
         val interceptR1 = 6.24300
-        val weightsR1 = Vectors.dense(4.024821, 6.679841)
+        val coefficientsR1 = Vectors.dense(4.024821, 6.679841)
         assert(model1.intercept ~== interceptR1 relTol 1E-3)
-        assert(model1.weights ~= weightsR1 relTol 1E-3)
+        assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
-             standardize=FALSE))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+             lambda = 0.57, standardize=FALSE ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
            (Intercept)         6.416948
@@ -235,16 +237,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 6.724286
          */
         val interceptR2 = 6.416948
-        val weightsR2 = Vectors.dense(3.893869, 6.724286)
+        val coefficientsR2 = Vectors.dense(3.893869, 6.724286)
 
         assert(model2.intercept ~== interceptR2 relTol 1E-3)
-        assert(model2.weights ~= weightsR2 relTol 1E-3)
+        assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
         model1.transform(datasetWithDenseFeature).select("features", "prediction")
           .collect().foreach {
             case Row(features: DenseVector, prediction1: Double) =>
-              val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
-                model1.intercept
+              val prediction2 =
+                features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+                  model1.intercept
               assert(prediction1 ~== prediction2 relTol 1E-5)
         }
       }
@@ -269,9 +272,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
         val model2 = trainer2.fit(datasetWithDenseFeature)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
-             intercept=FALSE))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+             lambda = 0.57, intercept=FALSE ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
            (Intercept)          .
@@ -279,15 +282,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 4.772913
          */
         val interceptR1 = 0.0
-        val weightsR1 = Vectors.dense(6.299752, 4.772913)
+        val coefficientsR1 = Vectors.dense(6.299752, 4.772913)
 
         assert(model1.intercept ~== interceptR1 absTol 1E-3)
-        assert(model1.weights ~= weightsR1 relTol 1E-3)
+        assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57,
-             intercept=FALSE, standardize=FALSE))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
+             lambda = 0.57, intercept=FALSE, standardize=FALSE ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
            (Intercept)         .
@@ -295,16 +298,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 4.764229
          */
         val interceptR2 = 0.0
-        val weightsR2 = Vectors.dense(6.232193, 4.764229)
+        val coefficientsR2 = Vectors.dense(6.232193, 4.764229)
 
         assert(model2.intercept ~== interceptR2 absTol 1E-3)
-        assert(model2.weights ~= weightsR2 relTol 1E-3)
+        assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
         model1.transform(datasetWithDenseFeature).select("features", "prediction")
           .collect().foreach {
             case Row(features: DenseVector, prediction1: Double) =>
-              val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
-                model1.intercept
+              val prediction2 =
+                features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+                  model1.intercept
               assert(prediction1 ~== prediction2 relTol 1E-5)
         }
       }
@@ -321,8 +325,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val model2 = trainer2.fit(datasetWithDenseFeature)
 
       /*
-         weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
-         > weights
+         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
+         > coefficients
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)         5.269376
@@ -330,15 +334,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
          as.numeric.data.V3. 5.712356)
        */
       val interceptR1 = 5.269376
-      val weightsR1 = Vectors.dense(3.736216, 5.712356)
+      val coefficientsR1 = Vectors.dense(3.736216, 5.712356)
 
       assert(model1.intercept ~== interceptR1 relTol 1E-3)
-      assert(model1.weights ~= weightsR1 relTol 1E-3)
+      assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
       /*
-         weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
            standardize=FALSE))
-         > weights
+         > coefficients
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)         5.791109
@@ -346,15 +350,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
          as.numeric.data.V3. 5.910406
        */
       val interceptR2 = 5.791109
-      val weightsR2 = Vectors.dense(3.435466, 5.910406)
+      val coefficientsR2 = Vectors.dense(3.435466, 5.910406)
 
       assert(model2.intercept ~== interceptR2 relTol 1E-3)
-      assert(model2.weights ~= weightsR2 relTol 1E-3)
+      assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
       model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
-            features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+              model1.intercept
           assert(prediction1 ~== prediction2 relTol 1E-5)
       }
     }
@@ -370,9 +375,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val model2 = trainer2.fit(datasetWithDenseFeature)
 
       /*
-         weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
            intercept = FALSE))
-         > weights
+         > coefficients
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)         .
@@ -380,15 +385,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
          as.numeric.data.V3. 4.214502
        */
       val interceptR1 = 0.0
-      val weightsR1 = Vectors.dense(5.522875, 4.214502)
+      val coefficientsR1 = Vectors.dense(5.522875, 4.214502)
 
       assert(model1.intercept ~== interceptR1 absTol 1E-3)
-      assert(model1.weights ~= weightsR1 relTol 1E-3)
+      assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
       /*
-         weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
+         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
            intercept = FALSE, standardize=FALSE))
-         > weights
+         > coefficients
           3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
          (Intercept)         .
@@ -396,15 +401,16 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
          as.numeric.data.V3. 4.187419
        */
       val interceptR2 = 0.0
-      val weightsR2 = Vectors.dense(5.263704, 4.187419)
+      val coefficientsR2 = Vectors.dense(5.263704, 4.187419)
 
       assert(model2.intercept ~== interceptR2 absTol 1E-3)
-      assert(model2.weights ~= weightsR2 relTol 1E-3)
+      assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
       model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
-            features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept
+            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+              model1.intercept
           assert(prediction1 ~== prediction2 relTol 1E-5)
       }
     }
@@ -428,8 +434,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
         val model2 = trainer2.fit(datasetWithDenseFeature)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+             lambda = 1.6 ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
            (Intercept)         6.324108
@@ -437,15 +444,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 5.200403
          */
         val interceptR1 = 5.696056
-        val weightsR1 = Vectors.dense(3.670489, 6.001122)
+        val coefficientsR1 = Vectors.dense(3.670489, 6.001122)
 
         assert(model1.intercept ~== interceptR1 relTol 1E-3)
-        assert(model1.weights ~= weightsR1 relTol 1E-3)
+        assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
              standardize=FALSE))
-           > weights
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
            (Intercept)         6.114723
@@ -453,16 +460,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 6.146531
          */
         val interceptR2 = 6.114723
-        val weightsR2 = Vectors.dense(3.409937, 6.146531)
+        val coefficientsR2 = Vectors.dense(3.409937, 6.146531)
 
         assert(model2.intercept ~== interceptR2 relTol 1E-3)
-        assert(model2.weights ~= weightsR2 relTol 1E-3)
+        assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
         model1.transform(datasetWithDenseFeature).select("features", "prediction")
           .collect().foreach {
           case Row(features: DenseVector, prediction1: Double) =>
-            val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
-              model1.intercept
+            val prediction2 =
+              features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+                model1.intercept
             assert(prediction1 ~== prediction2 relTol 1E-5)
         }
       }
@@ -487,9 +495,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
         val model2 = trainer2.fit(datasetWithDenseFeature)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
-             intercept=FALSE))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+             lambda = 1.6, intercept=FALSE ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                       s0
            (Intercept)         .
@@ -497,15 +505,15 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.dataM.V3. 4.322251
          */
         val interceptR1 = 0.0
-        val weightsR1 = Vectors.dense(5.673348, 4.322251)
+        val coefficientsR1 = Vectors.dense(5.673348, 4.322251)
 
         assert(model1.intercept ~== interceptR1 absTol 1E-3)
-        assert(model1.weights ~= weightsR1 relTol 1E-3)
+        assert(model1.coefficients ~= coefficientsR1 relTol 1E-3)
 
         /*
-           weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6,
-             intercept=FALSE, standardize=FALSE))
-           > weights
+           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
+             lambda = 1.6, intercept=FALSE, standardize=FALSE ))
+           > coefficients
             3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
            (Intercept)         .
@@ -513,16 +521,17 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
            as.numeric.data.V3. 4.297622
          */
         val interceptR2 = 0.0
-        val weightsR2 = Vectors.dense(5.477988, 4.297622)
+        val coefficientsR2 = Vectors.dense(5.477988, 4.297622)
 
         assert(model2.intercept ~== interceptR2 absTol 1E-3)
-        assert(model2.weights ~= weightsR2 relTol 1E-3)
+        assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
         model1.transform(datasetWithDenseFeature).select("features", "prediction")
           .collect().foreach {
           case Row(features: DenseVector, prediction1: Double) =>
-            val prediction2 = features(0) * model1.weights(0) + features(1) * model1.weights(1) +
-              model1.intercept
+            val prediction2 =
+              features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
+                model1.intercept
             assert(prediction1 ~== prediction2 relTol 1E-5)
         }
       }
@@ -554,7 +563,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val expectedResiduals = datasetWithDenseFeature.select("features", "label")
         .map { case Row(features: DenseVector, label: Double) =>
         val prediction =
-          features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept
+          features(0) * model.coefficients(0) + features(1) * model.coefficients(1) +
+            model.intercept
         label - prediction
       }
         .zip(model.summary.residuals.map(_.getDouble(0)))
@@ -663,9 +673,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val model1a1 = trainer1a.fit(weightedData)
       val model1b = trainer1b.fit(weightedData)
 
-      assert(model1a0.weights !~= model1a1.weights absTol 1E-3)
+      assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
       assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
-      assert(model1a0.weights ~== model1b.weights absTol 1E-3)
+      assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
       assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)
 
       val trainer2a = (new LinearRegression).setFitIntercept(true)
@@ -675,9 +685,9 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val model2a0 = trainer2a.fit(data)
       val model2a1 = trainer2a.fit(weightedData)
       val model2b = trainer2b.fit(weightedData)
-      assert(model2a0.weights !~= model2a1.weights absTol 1E-3)
+      assert(model2a0.coefficients !~= model2a1.coefficients absTol 1E-3)
       assert(model2a0.intercept !~= model2a1.intercept absTol 1E-3)
-      assert(model2a0.weights ~== model2b.weights absTol 1E-3)
+      assert(model2a0.coefficients ~== model2b.coefficients absTol 1E-3)
       assert(model2a0.intercept ~== model2b.intercept absTol 1E-3)
 
       val trainer3a = (new LinearRegression).setFitIntercept(false)
@@ -687,8 +697,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val model3a0 = trainer3a.fit(data)
       val model3a1 = trainer3a.fit(weightedData)
       val model3b = trainer3b.fit(weightedData)
-      assert(model3a0.weights !~= model3a1.weights absTol 1E-3)
-      assert(model3a0.weights ~== model3b.weights absTol 1E-3)
+      assert(model3a0.coefficients !~= model3a1.coefficients absTol 1E-3)
+      assert(model3a0.coefficients ~== model3b.coefficients absTol 1E-3)
 
       val trainer4a = (new LinearRegression).setFitIntercept(false)
         .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
@@ -697,8 +707,8 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val model4a0 = trainer4a.fit(data)
       val model4a1 = trainer4a.fit(weightedData)
       val model4b = trainer4b.fit(weightedData)
-      assert(model4a0.weights !~= model4a1.weights absTol 1E-3)
-      assert(model4a0.weights ~== model4b.weights absTol 1E-3)
+      assert(model4a0.coefficients !~= model4a1.coefficients absTol 1E-3)
+      assert(model4a0.coefficients ~== model4b.coefficients absTol 1E-3)
     }
   }
author	vectorijk <jiangkai@gmail.com>	2015-11-02 16:12:04 -0800
committer	DB Tsai <dbt@netflix.com>	2015-11-02 16:12:04 -0800
commit	c020f7d9d43548d27ae4a9564ba38981fd530cb1 (patch)
tree	8dc46ed1b48d88852323747b2d86aedd1c770b64 /mllib
parent	ec03866a7ef2d0826520755d47c8c9480148a76c (diff)
download	spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.gz spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.tar.bz2 spark-c020f7d9d43548d27ae4a9564ba38981fd530cb1.zip