From cabd54d93162a3f2a0cc7ed76fb46d8224edab94 Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Thu, 28 Apr 2016 22:44:14 -0700 Subject: [SPARK-14829][MLLIB] Deprecate GLM APIs using SGD ## What changes were proposed in this pull request? According to the [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829), deprecate API of LogisticRegression and LinearRegression using SGD ## How was this patch tested? manual tests Author: Zheng RuiFeng Closes #12596 from zhengruifeng/deprecate_sgd. --- .../mllib/classification/LogisticRegression.scala | 2 ++ .../org/apache/spark/mllib/regression/Lasso.scala | 4 ++++ .../spark/mllib/regression/LinearRegression.scala | 2 ++ .../spark/mllib/regression/RidgeRegression.scala | 4 ++++ python/pyspark/mllib/classification.py | 7 +++++++ python/pyspark/mllib/regression.py | 18 ++++++++++++++++++ 6 files changed, 37 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index f10570e662..1d25a58e0f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -206,6 +206,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] { * Using [[LogisticRegressionWithLBFGS]] is recommended over this. */ @Since("0.8.0") +@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0") class LogisticRegressionWithSGD private[mllib] ( private var stepSize: Double, private var numIterations: Int, @@ -240,6 +241,7 @@ class LogisticRegressionWithSGD private[mllib] ( * NOTE: Labels used in Logistic Regression should be {0, 1} */ @Since("0.8.0") +@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0") object LogisticRegressionWithSGD { // NOTE(shivaram): We use multiple train methods instead of default arguments to support // Java programs. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index d55e5dfdaa..ef8c80f0cb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -85,6 +85,8 @@ object LassoModel extends Loader[LassoModel] { * See also the documentation for the precise formulation. */ @Since("0.8.0") +@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " + + "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0") class LassoWithSGD private ( private var stepSize: Double, private var numIterations: Int, @@ -118,6 +120,8 @@ class LassoWithSGD private ( * */ @Since("0.8.0") +@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " + + "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0") object LassoWithSGD { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index e754e74492..9e9d98bc5e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -86,6 +86,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] { * See also the documentation for the precise formulation. */ @Since("0.8.0") +@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0") class LinearRegressionWithSGD private[mllib] ( private var stepSize: Double, private var numIterations: Int, @@ -119,6 +120,7 @@ class LinearRegressionWithSGD private[mllib] ( * */ @Since("0.8.0") +@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0") object LinearRegressionWithSGD { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 0a44ff559d..512fb9a712 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -86,6 +86,8 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] { * See also the documentation for the precise formulation. */ @Since("0.8.0") +@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " + + "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0") class RidgeRegressionWithSGD private ( private var stepSize: Double, private var numIterations: Int, @@ -119,6 +121,8 @@ class RidgeRegressionWithSGD private ( * */ @Since("0.8.0") +@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " + + "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0") object RidgeRegressionWithSGD { /** diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 57106f8690..fe5b6844bf 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -16,6 +16,7 @@ # from math import exp +import warnings import numpy from numpy import array @@ -266,6 +267,8 @@ class LogisticRegressionModel(LinearClassificationModel): class LogisticRegressionWithSGD(object): """ .. versionadded:: 0.9.0 + .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or + LogisticRegressionWithLBFGS. """ @classmethod @since('0.9.0') @@ -312,6 +315,10 @@ class LogisticRegressionWithSGD(object): A condition which decides iteration termination. (default: 0.001) """ + warnings.warn( + "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or " + "LogisticRegressionWithLBFGS.") + def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations), float(step), float(miniBatchFraction), i, float(regParam), regType, diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 3b77a62000..639c5eabaa 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -17,6 +17,7 @@ import numpy as np from numpy import array +import warnings from pyspark import RDD, since from pyspark.streaming.dstream import DStream @@ -221,6 +222,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights): class LinearRegressionWithSGD(object): """ .. versionadded:: 0.9.0 + .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression. """ @classmethod @since("0.9.0") @@ -276,6 +278,8 @@ class LinearRegressionWithSGD(object): A condition which decides iteration termination. (default: 0.001) """ + warnings.warn("Deprecated in 2.0.0. Use ml.regression.LinearRegression.") + def train(rdd, i): return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations), float(step), float(miniBatchFraction), i, float(regParam), @@ -366,6 +370,8 @@ class LassoModel(LinearRegressionModelBase): class LassoWithSGD(object): """ .. versionadded:: 0.9.0 + .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. + Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression. """ @classmethod @since("0.9.0") @@ -413,6 +419,10 @@ class LassoWithSGD(object): A condition which decides iteration termination. (default: 0.001) """ + warnings.warn( + "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. " + "Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.") + def train(rdd, i): return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step), float(regParam), float(miniBatchFraction), i, bool(intercept), @@ -503,6 +513,9 @@ class RidgeRegressionModel(LinearRegressionModelBase): class RidgeRegressionWithSGD(object): """ .. versionadded:: 0.9.0 + .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. + Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for + LinearRegression. """ @classmethod @since("0.9.0") @@ -550,6 +563,11 @@ class RidgeRegressionWithSGD(object): A condition which decides iteration termination. (default: 0.001) """ + warnings.warn( + "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. " + "Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for " + "LinearRegression.") + def train(rdd, i): return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step), float(regParam), float(miniBatchFraction), i, bool(intercept), -- cgit v1.2.3