aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-05-26 14:25:28 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-05-26 14:25:28 -0700
commitb0a03feef2cf4daa7642ec7f4dc479dbd473b581 (patch)
tree0d974e9a1badbe412a02c7bae91da2f3e98c2666 /mllib/src/main/scala
parent0f61d6efb45b9ee94fa663f67c4489fbdae2eded (diff)
downloadspark-b0a03feef2cf4daa7642ec7f4dc479dbd473b581.tar.gz
spark-b0a03feef2cf4daa7642ec7f4dc479dbd473b581.tar.bz2
spark-b0a03feef2cf4daa7642ec7f4dc479dbd473b581.zip
[SPARK-15457][MLLIB][ML] Eliminate some warnings from MLlib about deprecations
## What changes were proposed in this pull request? Several classes and methods have been deprecated and are creating lots of build warnings in branch-2.0. This issue is to identify and fix those items: * WithSGD classes: Change to make class not deprecated, object deprecated, and public class constructor deprecated. Any public use will require a deprecated API. We need to keep a non-deprecated private API since we cannot eliminate certain uses: Python API, streaming algs, and examples. * Use in PythonMLlibAPI: Change to using private constructors * Streaming algs: No warnings after we un-deprecate the classes * Examples: Deprecate or change ones which use deprecated APIs * MulticlassMetrics fields (precision, etc.) * LinearRegressionSummary.model field ## How was this patch tested? Existing tests. Checked for warnings manually. Author: Sean Owen <sowen@cloudera.com> Author: Joseph K. Bradley <joseph@databricks.com> Closes #13314 from jkbradley/warning-cleanups.
Diffstat (limited to 'mllib/src/main/scala')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala53
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala6
7 files changed, 44 insertions, 39 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
index 390e9b6444..0b84e0a3fa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
@@ -82,8 +82,8 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
val metrics = new MulticlassMetrics(predictionAndLabels)
val metric = $(metricName) match {
case "f1" => metrics.weightedFMeasure
- case "precision" => metrics.precision
- case "recall" => metrics.recall
+ case "precision" => metrics.accuracy
+ case "recall" => metrics.accuracy
case "weightedPrecision" => metrics.weightedPrecision
case "weightedRecall" => metrics.weightedRecall
case "accuracy" => metrics.accuracy
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index ff1038cbf1..37552194c5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -558,16 +558,18 @@ class LinearRegressionSummary private[regression] (
val predictionCol: String,
val labelCol: String,
val featuresCol: String,
- @deprecated("The model field is deprecated and will be removed in 2.1.0.", "2.0.0")
- val model: LinearRegressionModel,
+ private val privateModel: LinearRegressionModel,
private val diagInvAtWA: Array[Double]) extends Serializable {
+ @deprecated("The model field is deprecated and will be removed in 2.1.0.", "2.0.0")
+ val model: LinearRegressionModel = privateModel
+
@transient private val metrics = new RegressionMetrics(
predictions
.select(col(predictionCol), col(labelCol).cast(DoubleType))
.rdd
.map { case Row(pred: Double, label: Double) => (pred, label) },
- !model.getFitIntercept)
+ !privateModel.getFitIntercept)
/**
* Returns the explained variance regression score.
@@ -631,10 +633,10 @@ class LinearRegressionSummary private[regression] (
lazy val numInstances: Long = predictions.count()
/** Degrees of freedom */
- private val degreesOfFreedom: Long = if (model.getFitIntercept) {
- numInstances - model.coefficients.size - 1
+ private val degreesOfFreedom: Long = if (privateModel.getFitIntercept) {
+ numInstances - privateModel.coefficients.size - 1
} else {
- numInstances - model.coefficients.size
+ numInstances - privateModel.coefficients.size
}
/**
@@ -642,13 +644,15 @@ class LinearRegressionSummary private[regression] (
* the square root of the instance weights.
*/
lazy val devianceResiduals: Array[Double] = {
- val weighted = if (!model.isDefined(model.weightCol) || model.getWeightCol.isEmpty) {
- lit(1.0)
- } else {
- sqrt(col(model.getWeightCol))
- }
- val dr = predictions.select(col(model.getLabelCol).minus(col(model.getPredictionCol))
- .multiply(weighted).as("weightedResiduals"))
+ val weighted =
+ if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) {
+ lit(1.0)
+ } else {
+ sqrt(col(privateModel.getWeightCol))
+ }
+ val dr = predictions
+ .select(col(privateModel.getLabelCol).minus(col(privateModel.getPredictionCol))
+ .multiply(weighted).as("weightedResiduals"))
.select(min(col("weightedResiduals")).as("min"), max(col("weightedResiduals")).as("max"))
.first()
Array(dr.getDouble(0), dr.getDouble(1))
@@ -668,14 +672,15 @@ class LinearRegressionSummary private[regression] (
throw new UnsupportedOperationException(
"No Std. Error of coefficients available for this LinearRegressionModel")
} else {
- val rss = if (!model.isDefined(model.weightCol) || model.getWeightCol.isEmpty) {
- meanSquaredError * numInstances
- } else {
- val t = udf { (pred: Double, label: Double, weight: Double) =>
- math.pow(label - pred, 2.0) * weight }
- predictions.select(t(col(model.getPredictionCol), col(model.getLabelCol),
- col(model.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0)
- }
+ val rss =
+ if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) {
+ meanSquaredError * numInstances
+ } else {
+ val t = udf { (pred: Double, label: Double, weight: Double) =>
+ math.pow(label - pred, 2.0) * weight }
+ predictions.select(t(col(privateModel.getPredictionCol), col(privateModel.getLabelCol),
+ col(privateModel.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0)
+ }
val sigma2 = rss / degreesOfFreedom
diagInvAtWA.map(_ * sigma2).map(math.sqrt)
}
@@ -695,10 +700,10 @@ class LinearRegressionSummary private[regression] (
throw new UnsupportedOperationException(
"No t-statistic available for this LinearRegressionModel")
} else {
- val estimate = if (model.getFitIntercept) {
- Array.concat(model.coefficients.toArray, Array(model.intercept))
+ val estimate = if (privateModel.getFitIntercept) {
+ Array.concat(privateModel.coefficients.toArray, Array(privateModel.intercept))
} else {
- model.coefficients.toArray
+ privateModel.coefficients.toArray
}
estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 }
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 90d3827531..667290ece3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -152,7 +152,7 @@ private[python] class PythonMLLibAPI extends Serializable {
intercept: Boolean,
validateData: Boolean,
convergenceTol: Double): JList[Object] = {
- val lrAlg = new LinearRegressionWithSGD()
+ val lrAlg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0)
lrAlg.setIntercept(intercept)
.setValidateData(validateData)
lrAlg.optimizer
@@ -181,7 +181,7 @@ private[python] class PythonMLLibAPI extends Serializable {
intercept: Boolean,
validateData: Boolean,
convergenceTol: Double): JList[Object] = {
- val lassoAlg = new LassoWithSGD()
+ val lassoAlg = new LassoWithSGD(1.0, 100, 0.01, 1.0)
lassoAlg.setIntercept(intercept)
.setValidateData(validateData)
lassoAlg.optimizer
@@ -209,7 +209,7 @@ private[python] class PythonMLLibAPI extends Serializable {
intercept: Boolean,
validateData: Boolean,
convergenceTol: Double): JList[Object] = {
- val ridgeAlg = new RidgeRegressionWithSGD()
+ val ridgeAlg = new RidgeRegressionWithSGD(1.0, 100, 0.01, 1.0)
ridgeAlg.setIntercept(intercept)
.setValidateData(validateData)
ridgeAlg.optimizer
@@ -268,7 +268,7 @@ private[python] class PythonMLLibAPI extends Serializable {
intercept: Boolean,
validateData: Boolean,
convergenceTol: Double): JList[Object] = {
- val LogRegAlg = new LogisticRegressionWithSGD()
+ val LogRegAlg = new LogisticRegressionWithSGD(1.0, 100, 0.01, 1.0)
LogRegAlg.setIntercept(intercept)
.setValidateData(validateData)
LogRegAlg.optimizer
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index f3c52f61a3..adbcdd302a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -200,13 +200,12 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
/**
* Train a classification model for Binary Logistic Regression
* using Stochastic Gradient Descent. By default L2 regularization is used,
- * which can be changed via [[LogisticRegressionWithSGD.optimizer]].
+ * which can be changed via `LogisticRegressionWithSGD.optimizer`.
* NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
* for k classes multi-label classification problem.
* Using [[LogisticRegressionWithLBFGS]] is recommended over this.
*/
@Since("0.8.0")
-@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
class LogisticRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
@@ -229,6 +228,7 @@ class LogisticRegressionWithSGD private[mllib] (
* numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
+ @deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
def this() = this(1.0, 100, 0.01, 1.0)
override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index ef8c80f0cb..e14bddf97d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -85,9 +85,7 @@ object LassoModel extends Loader[LassoModel] {
* See also the documentation for the precise formulation.
*/
@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
- "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-class LassoWithSGD private (
+class LassoWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
private var regParam: Double,
@@ -108,6 +106,8 @@ class LassoWithSGD private (
* regParam: 0.01, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
+ @deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+ "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
def this() = this(1.0, 100, 0.01, 1.0)
override protected def createModel(weights: Vector, intercept: Double) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 9e9d98bc5e..2ceac4b8cc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -86,7 +86,6 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
* See also the documentation for the precise formulation.
*/
@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
class LinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
@@ -108,6 +107,7 @@ class LinearRegressionWithSGD private[mllib] (
* numIterations: 100, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
+ @deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
def this() = this(1.0, 100, 0.0, 1.0)
override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 512fb9a712..301f02fd98 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -86,9 +86,7 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
* See also the documentation for the precise formulation.
*/
@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
- "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-class RidgeRegressionWithSGD private (
+class RidgeRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
private var regParam: Double,
@@ -109,6 +107,8 @@ class RidgeRegressionWithSGD private (
* regParam: 0.01, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
+ @deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+ "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
def this() = this(1.0, 100, 0.01, 1.0)
override protected def createModel(weights: Vector, intercept: Double) = {