/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.ml.regression

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.param.ParamsSuite
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.sql.{DataFrame, Row}

class LinearRegressionSuite
  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {

  private val seed: Int = 42
  @transient var datasetWithDenseFeature: DataFrame = _
  @transient var datasetWithDenseFeatureWithoutIntercept: DataFrame = _
  @transient var datasetWithSparseFeature: DataFrame = _
  @transient var datasetWithWeight: DataFrame = _
  @transient var datasetWithWeightConstantLabel: DataFrame = _
  @transient var datasetWithWeightZeroLabel: DataFrame = _

  /*
     In `LinearRegressionSuite`, we will make sure that the model trained by SparkML
     is the same as the one trained by R's glmnet package. The following instruction
     describes how to reproduce the data in R.
     In a spark-shell, use the following code:

     import org.apache.spark.mllib.util.LinearDataGenerator
     val data =
       sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, Array(4.7, 7.2),
         Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2)
     data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)).coalesce(1)
       .saveAsTextFile("path")
   */
  override def beforeAll(): Unit = {
    super.beforeAll()
    datasetWithDenseFeature = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
        xVariance = Array(0.7, 1.2), nPoints = 10000, seed, eps = 0.1), 2))
    /*
       datasetWithoutIntercept is not needed for correctness testing but is useful for illustrating
       training model without intercept
     */
    datasetWithDenseFeatureWithoutIntercept = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        intercept = 0.0, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
        xVariance = Array(0.7, 1.2), nPoints = 10000, seed, eps = 0.1), 2))

    val r = new Random(seed)
    // When feature size is larger than 4096, normal optimizer is choosed
    // as the solver of linear regression in the case of "auto" mode.
    val featureSize = 4100
    datasetWithSparseFeature = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        intercept = 0.0, weights = Seq.fill(featureSize)(r.nextDouble).toArray,
        xMean = Seq.fill(featureSize)(r.nextDouble).toArray,
        xVariance = Seq.fill(featureSize)(r.nextDouble).toArray, nPoints = 200,
        seed, eps = 0.1, sparsity = 0.7), 2))

    /*
       R code:

       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
       b <- c(17, 19, 23, 29)
       w <- c(1, 2, 3, 4)
       df <- as.data.frame(cbind(A, b))
     */
    datasetWithWeight = sqlContext.createDataFrame(
      sc.parallelize(Seq(
        Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
        Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)),
        Instance(23.0, 3.0, Vectors.dense(2.0, 11.0)),
        Instance(29.0, 4.0, Vectors.dense(3.0, 13.0))
      ), 2))

    /*
       R code:

       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
       b.const <- c(17, 17, 17, 17)
       w <- c(1, 2, 3, 4)
       df.const.label <- as.data.frame(cbind(A, b.const))
     */
    datasetWithWeightConstantLabel = sqlContext.createDataFrame(
      sc.parallelize(Seq(
        Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
        Instance(17.0, 2.0, Vectors.dense(1.0, 7.0)),
        Instance(17.0, 3.0, Vectors.dense(2.0, 11.0)),
        Instance(17.0, 4.0, Vectors.dense(3.0, 13.0))
      ), 2))
    datasetWithWeightZeroLabel = sqlContext.createDataFrame(
      sc.parallelize(Seq(
        Instance(0.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
        Instance(0.0, 2.0, Vectors.dense(1.0, 7.0)),
        Instance(0.0, 3.0, Vectors.dense(2.0, 11.0)),
        Instance(0.0, 4.0, Vectors.dense(3.0, 13.0))
      ), 2))
  }

  test("params") {
    ParamsSuite.checkParams(new LinearRegression)
    val model = new LinearRegressionModel("linearReg", Vectors.dense(0.0), 0.0)
    ParamsSuite.checkParams(model)
  }

  test("linear regression: default params") {
    val lir = new LinearRegression
    assert(lir.getLabelCol === "label")
    assert(lir.getFeaturesCol === "features")
    assert(lir.getPredictionCol === "prediction")
    assert(lir.getRegParam === 0.0)
    assert(lir.getElasticNetParam === 0.0)
    assert(lir.getFitIntercept)
    assert(lir.getStandardization)
    assert(lir.getSolver == "auto")
    val model = lir.fit(datasetWithDenseFeature)

    // copied model must have the same parent.
    MLTestingUtils.checkCopy(model)

    model.transform(datasetWithDenseFeature)
      .select("label", "prediction")
      .collect()
    assert(model.getFeaturesCol === "features")
    assert(model.getPredictionCol === "prediction")
    assert(model.intercept !== 0.0)
    assert(model.hasParent)
    val numFeatures = datasetWithDenseFeature.select("features").first().getAs[Vector](0).size
    assert(model.numFeatures === numFeatures)
  }

  test("linear regression with intercept without regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = new LinearRegression().setSolver(solver)
      // The result should be the same regardless of standardization without regularization
      val trainer2 = (new LinearRegression).setStandardization(false).setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val model2 = trainer2.fit(datasetWithDenseFeature)

      /*
         Using the following R code to load the data and train the model using glmnet package.

         library("glmnet")
         data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
         features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
         label <- as.numeric(data$V1)
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)         6.298698
         as.numeric.data.V2. 4.700706
         as.numeric.data.V3. 7.199082
       */
      val interceptR = 6.298698
      val coefficientsR = Vectors.dense(4.700706, 7.199082)

      assert(model1.intercept ~== interceptR relTol 1E-3)
      assert(model1.coefficients ~= coefficientsR relTol 1E-3)
      assert(model2.intercept ~== interceptR relTol 1E-3)
      assert(model2.coefficients ~= coefficientsR relTol 1E-3)

      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
        case Row(features: DenseVector, prediction1: Double) =>
          val prediction2 =
            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
              model1.intercept
          assert(prediction1 ~== prediction2 relTol 1E-5)
      }
    }
  }

  test("linear regression without intercept without regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setFitIntercept(false).setSolver(solver)
      // Without regularization the results should be the same
      val trainer2 = (new LinearRegression).setFitIntercept(false).setStandardization(false)
        .setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val modelWithoutIntercept1 = trainer1.fit(datasetWithDenseFeatureWithoutIntercept)
      val model2 = trainer2.fit(datasetWithDenseFeature)
      val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
           intercept = FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)         .
         as.numeric.data.V2. 6.973403
         as.numeric.data.V3. 5.284370
       */
      val coefficientsR = Vectors.dense(6.973403, 5.284370)

      assert(model1.intercept ~== 0 absTol 1E-2)
      assert(model1.coefficients ~= coefficientsR relTol 1E-2)
      assert(model2.intercept ~== 0 absTol 1E-2)
      assert(model2.coefficients ~= coefficientsR relTol 1E-2)

      /*
         Then again with the data with no intercept:
         > coefficientsWithourIntercept
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)           .
         as.numeric.data3.V2. 4.70011
         as.numeric.data3.V3. 7.19943
       */
      val coefficientsWithourInterceptR = Vectors.dense(4.70011, 7.19943)

      assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
      assert(modelWithoutIntercept1.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
      assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
      assert(modelWithoutIntercept2.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
    }
  }

  test("linear regression with intercept with L1 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setSolver(solver).setStandardization(false)

      // Normal optimizer is not supported with only L1 regularization case.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian",
             alpha = 1.0, lambda = 0.57 ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
           (Intercept)       6.242284
           as.numeric.d1.V2. 4.019605
           as.numeric.d1.V3. 6.679538
         */
        val interceptR1 = 6.242284
        val coefficientsR1 = Vectors.dense(4.019605, 6.679538)
        assert(model1.intercept ~== interceptR1 relTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
             lambda = 0.57, standardize=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
           (Intercept)         6.416948
           as.numeric.data.V2. 3.893869
           as.numeric.data.V3. 6.724286
         */
        val interceptR2 = 6.416948
        val coefficientsR2 = Vectors.dense(3.893869, 6.724286)

        assert(model2.intercept ~== interceptR2 relTol 1E-3)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
            case Row(features: DenseVector, prediction1: Double) =>
              val prediction2 =
                features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                  model1.intercept
              assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression without intercept with L1 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setFitIntercept(false).setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setFitIntercept(false).setStandardization(false).setSolver(solver)

      // Normal optimizer is not supported with only L1 regularization case.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
             lambda = 0.57, intercept=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)          .
           as.numeric.data.V2. 6.272927
           as.numeric.data.V3. 4.782604
         */
        val interceptR1 = 0.0
        val coefficientsR1 = Vectors.dense(6.272927, 4.782604)

        assert(model1.intercept ~== interceptR1 absTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
             lambda = 0.57, intercept=FALSE, standardize=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)         .
           as.numeric.data.V2. 6.207817
           as.numeric.data.V3. 4.775780
         */
        val interceptR2 = 0.0
        val coefficientsR2 = Vectors.dense(6.207817, 4.775780)

        assert(model2.intercept ~== interceptR2 absTol 1E-2)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
            case Row(features: DenseVector, prediction1: Double) =>
              val prediction2 =
                features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                  model1.intercept
              assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression with intercept with L2 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setStandardization(false).setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val model2 = trainer2.fit(datasetWithDenseFeature)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)       5.260103
         as.numeric.d1.V2. 3.725522
         as.numeric.d1.V3. 5.711203
       */
      val interceptR1 = 5.260103
      val coefficientsR1 = Vectors.dense(3.725522, 5.711203)

      assert(model1.intercept ~== interceptR1 relTol 1E-2)
      assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
           standardize=FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)       5.790885
         as.numeric.d1.V2. 3.432373
         as.numeric.d1.V3. 5.919196
       */
      val interceptR2 = 5.790885
      val coefficientsR2 = Vectors.dense(3.432373, 5.919196)

      assert(model2.intercept ~== interceptR2 relTol 1E-2)
      assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
        case Row(features: DenseVector, prediction1: Double) =>
          val prediction2 =
            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
              model1.intercept
          assert(prediction1 ~== prediction2 relTol 1E-5)
      }
    }
  }

  test("linear regression without intercept with L2 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setFitIntercept(false).setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setFitIntercept(false).setStandardization(false).setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val model2 = trainer2.fit(datasetWithDenseFeature)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
           intercept = FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)       .
         as.numeric.d1.V2. 5.493430
         as.numeric.d1.V3. 4.223082
       */
      val interceptR1 = 0.0
      val coefficientsR1 = Vectors.dense(5.493430, 4.223082)

      assert(model1.intercept ~== interceptR1 absTol 1E-2)
      assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
           intercept = FALSE, standardize=FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)         .
         as.numeric.d1.V2. 5.244324
         as.numeric.d1.V3. 4.203106
       */
      val interceptR2 = 0.0
      val coefficientsR2 = Vectors.dense(5.244324, 4.203106)

      assert(model2.intercept ~== interceptR2 absTol 1E-2)
      assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
        case Row(features: DenseVector, prediction1: Double) =>
          val prediction2 =
            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
              model1.intercept
          assert(prediction1 ~== prediction2 relTol 1E-5)
      }
    }
  }

  test("linear regression with intercept with ElasticNet regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setStandardization(false).setSolver(solver)

      // Normal optimizer is not supported with non-zero elasticnet parameter.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
             lambda = 1.6 ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)       5.689855
           as.numeric.d1.V2. 3.661181
           as.numeric.d1.V3. 6.000274
         */
        val interceptR1 = 5.689855
        val coefficientsR1 = Vectors.dense(3.661181, 6.000274)

        assert(model1.intercept ~== interceptR1 relTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
             standardize=FALSE))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)       6.113890
           as.numeric.d1.V2. 3.407021
           as.numeric.d1.V3. 6.152512
         */
        val interceptR2 = 6.113890
        val coefficientsR2 = Vectors.dense(3.407021, 6.152512)

        assert(model2.intercept ~== interceptR2 relTol 1E-2)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
          case Row(features: DenseVector, prediction1: Double) =>
            val prediction2 =
              features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                model1.intercept
            assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression without intercept with ElasticNet regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setFitIntercept(false).setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setFitIntercept(false).setStandardization(false).setSolver(solver)

      // Normal optimizer is not supported with non-zero elasticnet parameter.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
             lambda = 1.6, intercept=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
           (Intercept)       .
           as.numeric.d1.V2. 5.643748
           as.numeric.d1.V3. 4.331519
         */
        val interceptR1 = 0.0
        val coefficientsR1 = Vectors.dense(5.643748, 4.331519)

        assert(model1.intercept ~== interceptR1 absTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
             lambda = 1.6, intercept=FALSE, standardize=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)         .
           as.numeric.d1.V2. 5.455902
           as.numeric.d1.V3. 4.312266

         */
        val interceptR2 = 0.0
        val coefficientsR2 = Vectors.dense(5.455902, 4.312266)

        assert(model2.intercept ~== interceptR2 absTol 1E-2)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
          case Row(features: DenseVector, prediction1: Double) =>
            val prediction2 =
              features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                model1.intercept
            assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression model with constant label") {
    /*
       R code:
       for (formula in c(b.const ~ . -1, b.const ~ .)) {
         model <- lm(formula, data=df.const.label, weights=w)
         print(as.vector(coef(model)))
       }
      [1] -9.221298  3.394343
      [1] 17  0  0
    */
    val expected = Seq(
      Vectors.dense(0.0, -9.221298, 3.394343),
      Vectors.dense(17.0, 0.0, 0.0))

    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      var idx = 0
      for (fitIntercept <- Seq(false, true)) {
        val model1 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setWeightCol("weight")
          .setSolver(solver)
          .fit(datasetWithWeightConstantLabel)
        val actual1 = Vectors.dense(model1.intercept, model1.coefficients(0),
            model1.coefficients(1))
        assert(actual1 ~== expected(idx) absTol 1e-4)

        val model2 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setWeightCol("weight")
          .setSolver(solver)
          .fit(datasetWithWeightZeroLabel)
        val actual2 = Vectors.dense(model2.intercept, model2.coefficients(0),
            model2.coefficients(1))
        assert(actual2 ~==  Vectors.dense(0.0, 0.0, 0.0) absTol 1e-4)
        idx += 1
      }
    }
  }

  test("regularized linear regression through origin with constant label") {
    // The problem is ill-defined if fitIntercept=false, regParam is non-zero.
    // An exception is thrown in this case.
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      for (standardization <- Seq(false, true)) {
        val model = new LinearRegression().setFitIntercept(false)
          .setRegParam(0.1).setStandardization(standardization).setSolver(solver)
        intercept[IllegalArgumentException] {
          model.fit(datasetWithWeightConstantLabel)
        }
      }
    }
  }

  test("linear regression with l-bfgs when training is not needed") {
    // When label is constant, l-bfgs solver returns results without training.
    // There are two possibilities: If the label is non-zero but constant,
    // and fitIntercept is true, then the model return yMean as intercept without training.
    // If label is all zeros, then all coefficients are zero regardless of fitIntercept, so
    // no training is needed.
    for (fitIntercept <- Seq(false, true)) {
      for (standardization <- Seq(false, true)) {
        val model1 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setStandardization(standardization)
          .setWeightCol("weight")
          .setSolver("l-bfgs")
          .fit(datasetWithWeightConstantLabel)
        if (fitIntercept) {
          assert(model1.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
        }
        val model2 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setWeightCol("weight")
          .setSolver("l-bfgs")
          .fit(datasetWithWeightZeroLabel)
        assert(model2.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
      }
    }
  }

  test("linear regression model training summary") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer = new LinearRegression().setSolver(solver)
      val model = trainer.fit(datasetWithDenseFeature)
      val trainerNoPredictionCol = trainer.setPredictionCol("")
      val modelNoPredictionCol = trainerNoPredictionCol.fit(datasetWithDenseFeature)

      // Training results for the model should be available
      assert(model.hasSummary)
      assert(modelNoPredictionCol.hasSummary)

      // Schema should be a superset of the input dataset
      assert((datasetWithDenseFeature.schema.fieldNames.toSet + "prediction").subsetOf(
        model.summary.predictions.schema.fieldNames.toSet))
      // Validate that we re-insert a prediction column for evaluation
      val modelNoPredictionColFieldNames
      = modelNoPredictionCol.summary.predictions.schema.fieldNames
      assert((datasetWithDenseFeature.schema.fieldNames.toSet).subsetOf(
        modelNoPredictionColFieldNames.toSet))
      assert(modelNoPredictionColFieldNames.exists(s => s.startsWith("prediction_")))

      // Residuals in [[LinearRegressionResults]] should equal those manually computed
      val expectedResiduals = datasetWithDenseFeature.select("features", "label")
        .map { case Row(features: DenseVector, label: Double) =>
        val prediction =
          features(0) * model.coefficients(0) + features(1) * model.coefficients(1) +
            model.intercept
        label - prediction
      }
        .zip(model.summary.residuals.map(_.getDouble(0)))
        .collect()
        .foreach { case (manualResidual: Double, resultResidual: Double) =>
        assert(manualResidual ~== resultResidual relTol 1E-5)
      }

      /*
         # Use the following R code to generate model training results.

         # path/part-00000 is the file generated by running LinearDataGenerator.generateLinearInput
         # as described before the beforeAll() method.
         d1 <- read.csv("path/part-00000", header=FALSE, stringsAsFactors=FALSE)
         fit <- glm(V1 ~ V2 + V3, data = d1, family = "gaussian")
         names(f1)[1] = c("V2")
         names(f1)[2] = c("V3")
         f1 <- data.frame(as.numeric(d1$V2), as.numeric(d1$V3))
         predictions <- predict(fit, newdata=f1)
         l1 <- as.numeric(d1$V1)

         residuals <- l1 - predictions
         > mean(residuals^2)           # MSE
         [1] 0.00985449
         > mean(abs(residuals))        # MAD
         [1] 0.07961668
         > cor(predictions, l1)^2   # r^2
         [1] 0.9998737

         > summary(fit)

          Call:
          glm(formula = V1 ~ V2 + V3, family = "gaussian", data = d1)

          Deviance Residuals:
               Min        1Q    Median        3Q       Max
          -0.47082  -0.06797   0.00002   0.06725   0.34635

          Coefficients:
                       Estimate Std. Error t value Pr(>|t|)
          (Intercept) 6.3022157  0.0018600    3388   <2e-16 ***
          V2          4.6982442  0.0011805    3980   <2e-16 ***
          V3          7.1994344  0.0009044    7961   <2e-16 ***
          ---

          ....
       */
      assert(model.summary.meanSquaredError ~== 0.00985449 relTol 1E-4)
      assert(model.summary.meanAbsoluteError ~== 0.07961668 relTol 1E-4)
      assert(model.summary.r2 ~== 0.9998737 relTol 1E-4)

      // Normal solver uses "WeightedLeastSquares". This algorithm does not generate
      // objective history because it does not run through iterations.
      if (solver == "l-bfgs") {
        // Objective function should be monotonically decreasing for linear regression
        assert(
          model.summary
            .objectiveHistory
            .sliding(2)
            .forall(x => x(0) >= x(1)))
      } else {
        // To clalify that the normal solver is used here.
        assert(model.summary.objectiveHistory.length == 1)
        assert(model.summary.objectiveHistory(0) == 0.0)
        val devianceResidualsR = Array(-0.47082, 0.34635)
        val seCoefR = Array(0.0011805, 0.0009044, 0.0018600)
        val tValsR = Array(3980, 7961, 3388)
        val pValsR = Array(0, 0, 0)
        model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x =>
          assert(x._1 ~== x._2 absTol 1E-4) }
        model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x =>
          assert(x._1 ~== x._2 absTol 1E-4) }
        model.summary.tValues.map(_.round).zip(tValsR).foreach{ x => assert(x._1 === x._2) }
        model.summary.pValues.map(_.round).zip(pValsR).foreach{ x => assert(x._1 === x._2) }
      }
    }
  }

  test("linear regression model testset evaluation summary") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer = new LinearRegression().setSolver(solver)
      val model = trainer.fit(datasetWithDenseFeature)

      // Evaluating on training dataset should yield results summary equal to training summary
      val testSummary = model.evaluate(datasetWithDenseFeature)
      assert(model.summary.meanSquaredError ~== testSummary.meanSquaredError relTol 1E-5)
      assert(model.summary.r2 ~== testSummary.r2 relTol 1E-5)
      model.summary.residuals.select("residuals").collect()
        .zip(testSummary.residuals.select("residuals").collect())
        .forall { case (Row(r1: Double), Row(r2: Double)) => r1 ~== r2 relTol 1E-5 }
    }
  }

  test("linear regression with weighted samples") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val (data, weightedData) = {
        val activeData = LinearDataGenerator.generateLinearInput(
          6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 500, 1, 0.1)

        val rnd = new Random(8392)
        val signedData = activeData.map { case p: LabeledPoint =>
          (rnd.nextGaussian() > 0.0, p)
        }

        val data1 = signedData.flatMap {
          case (true, p) => Iterator(p, p)
          case (false, p) => Iterator(p)
        }

        val weightedSignedData = signedData.flatMap {
          case (true, LabeledPoint(label, features)) =>
            Iterator(
              Instance(label, weight = 1.2, features),
              Instance(label, weight = 0.8, features)
            )
          case (false, LabeledPoint(label, features)) =>
            Iterator(
              Instance(label, weight = 0.3, features),
              Instance(label, weight = 0.1, features),
              Instance(label, weight = 0.6, features)
            )
        }

        val noiseData = LinearDataGenerator.generateLinearInput(
          2, Array(1, 3), Array(0.9, -1.3), Array(0.7, 1.2), 500, 1, 0.1)
        val weightedNoiseData = noiseData.map {
          case LabeledPoint(label, features) => Instance(label, weight = 0, features)
        }
        val data2 = weightedSignedData ++ weightedNoiseData

        (sqlContext.createDataFrame(sc.parallelize(data1, 4)),
          sqlContext.createDataFrame(sc.parallelize(data2, 4)))
      }

      val trainer1a = (new LinearRegression).setFitIntercept(true)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)
      val trainer1b = (new LinearRegression).setFitIntercept(true).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)

      // Normal optimizer is not supported with non-zero elasticnet parameter.
      val model1a0 = trainer1a.fit(data)
      val model1a1 = trainer1a.fit(weightedData)
      val model1b = trainer1b.fit(weightedData)

      assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
      assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
      assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
      assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)

      val trainer2a = (new LinearRegression).setFitIntercept(true)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val trainer2b = (new LinearRegression).setFitIntercept(true).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val model2a0 = trainer2a.fit(data)
      val model2a1 = trainer2a.fit(weightedData)
      val model2b = trainer2b.fit(weightedData)
      assert(model2a0.coefficients !~= model2a1.coefficients absTol 1E-3)
      assert(model2a0.intercept !~= model2a1.intercept absTol 1E-3)
      assert(model2a0.coefficients ~== model2b.coefficients absTol 1E-3)
      assert(model2a0.intercept ~== model2b.intercept absTol 1E-3)

      val trainer3a = (new LinearRegression).setFitIntercept(false)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)
      val trainer3b = (new LinearRegression).setFitIntercept(false).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)
      val model3a0 = trainer3a.fit(data)
      val model3a1 = trainer3a.fit(weightedData)
      val model3b = trainer3b.fit(weightedData)
      assert(model3a0.coefficients !~= model3a1.coefficients absTol 1E-3)
      assert(model3a0.coefficients ~== model3b.coefficients absTol 1E-3)

      val trainer4a = (new LinearRegression).setFitIntercept(false)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val trainer4b = (new LinearRegression).setFitIntercept(false).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val model4a0 = trainer4a.fit(data)
      val model4a1 = trainer4a.fit(weightedData)
      val model4b = trainer4b.fit(weightedData)
      assert(model4a0.coefficients !~= model4a1.coefficients absTol 1E-3)
      assert(model4a0.coefficients ~== model4b.coefficients absTol 1E-3)
    }
  }

  test("linear regression model with l-bfgs with big feature datasets") {
    val trainer = new LinearRegression().setSolver("auto")
    val model = trainer.fit(datasetWithSparseFeature)

    // Training results for the model should be available
    assert(model.hasSummary)
    // When LBFGS is used as optimizer, objective history can be restored.
    assert(
      model.summary
        .objectiveHistory
        .sliding(2)
        .forall(x => x(0) >= x(1)))
  }

  test("linear regression summary with weighted samples and intercept by normal solver") {
    /*
       R code:

       model <- glm(formula = "b ~ .", data = df, weights = w)
       summary(model)

       Call:
       glm(formula = "b ~ .", data = df, weights = w)

       Deviance Residuals:
            1       2       3       4
        1.920  -1.358  -1.109   0.960

       Coefficients:
                   Estimate Std. Error t value Pr(>|t|)
       (Intercept)   18.080      9.608   1.882    0.311
       V1             6.080      5.556   1.094    0.471
       V2            -0.600      1.960  -0.306    0.811

       (Dispersion parameter for gaussian family taken to be 7.68)

           Null deviance: 202.00  on 3  degrees of freedom
       Residual deviance:   7.68  on 1  degrees of freedom
       AIC: 18.783

       Number of Fisher Scoring iterations: 2
     */

    val model = new LinearRegression()
      .setWeightCol("weight")
      .setSolver("normal")
      .fit(datasetWithWeight)
    val coefficientsR = Vectors.dense(Array(6.080, -0.600))
    val interceptR = 18.080
    val devianceResidualsR = Array(-1.358, 1.920)
    val seCoefR = Array(5.556, 1.960, 9.608)
    val tValsR = Array(1.094, -0.306, 1.882)
    val pValsR = Array(0.471, 0.811, 0.311)

    assert(model.coefficients ~== coefficientsR absTol 1E-3)
    assert(model.intercept ~== interceptR absTol 1E-3)
    model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
  }

  test("linear regression summary with weighted samples and w/o intercept by normal solver") {
    /*
       R code:

       model <- glm(formula = "b ~ . -1", data = df, weights = w)
       summary(model)

       Call:
       glm(formula = "b ~ . -1", data = df, weights = w)

       Deviance Residuals:
            1       2       3       4
        1.950   2.344  -4.600   2.103

       Coefficients:
          Estimate Std. Error t value Pr(>|t|)
       V1  -3.7271     2.9032  -1.284   0.3279
       V2   3.0100     0.6022   4.998   0.0378 *
       ---
       Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

       (Dispersion parameter for gaussian family taken to be 17.4376)

           Null deviance: 5962.000  on 4  degrees of freedom
       Residual deviance:   34.875  on 2  degrees of freedom
       AIC: 22.835

       Number of Fisher Scoring iterations: 2
     */

    val model = new LinearRegression()
      .setWeightCol("weight")
      .setSolver("normal")
      .setFitIntercept(false)
      .fit(datasetWithWeight)
    val coefficientsR = Vectors.dense(Array(-3.7271, 3.0100))
    val interceptR = 0.0
    val devianceResidualsR = Array(-4.600, 2.344)
    val seCoefR = Array(2.9032, 0.6022)
    val tValsR = Array(-1.284, 4.998)
    val pValsR = Array(0.3279, 0.0378)

    assert(model.coefficients ~== coefficientsR absTol 1E-3)
    assert(model.intercept === interceptR)
    model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
  }

  test("read/write") {
    def checkModelData(model: LinearRegressionModel, model2: LinearRegressionModel): Unit = {
      assert(model.intercept === model2.intercept)
      assert(model.coefficients === model2.coefficients)
    }
    val lr = new LinearRegression()
    testEstimatorAndModelReadWrite(lr, datasetWithWeight, LinearRegressionSuite.allParamSettings,
      checkModelData)
  }
}

object LinearRegressionSuite {

  /**
   * Mapping from all Params to valid settings which differ from the defaults.
   * This is useful for tests which need to exercise all Params, such as save/load.
   * This excludes input columns to simplify some tests.
   */
  val allParamSettings: Map[String, Any] = Map(
    "predictionCol" -> "myPrediction",
    "regParam" -> 0.01,
    "elasticNetParam" -> 0.1,
    "maxIter" -> 2,  // intentionally small
    "fitIntercept" -> true,
    "tol" -> 0.8,
    "standardization" -> false,
    "solver" -> "l-bfgs"
  )
}