aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZheng RuiFeng <ruifengz@foxmail.com>2016-04-28 22:44:14 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-28 22:44:14 -0700
commitcabd54d93162a3f2a0cc7ed76fb46d8224edab94 (patch)
treeea9fd0548780ced8b3db432a068d8c7c809a4568
parent769a909d1357766a441ff69e6e98c22c51b12c93 (diff)
downloadspark-cabd54d93162a3f2a0cc7ed76fb46d8224edab94.tar.gz
spark-cabd54d93162a3f2a0cc7ed76fb46d8224edab94.tar.bz2
spark-cabd54d93162a3f2a0cc7ed76fb46d8224edab94.zip
[SPARK-14829][MLLIB] Deprecate GLM APIs using SGD
## What changes were proposed in this pull request? According to the [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829), deprecate API of LogisticRegression and LinearRegression using SGD ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #12596 from zhengruifeng/deprecate_sgd.
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala4
-rw-r--r--python/pyspark/mllib/classification.py7
-rw-r--r--python/pyspark/mllib/regression.py18
6 files changed, 37 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index f10570e662..1d25a58e0f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -206,6 +206,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
* Using [[LogisticRegressionWithLBFGS]] is recommended over this.
*/
@Since("0.8.0")
+@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
class LogisticRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
@@ -240,6 +241,7 @@ class LogisticRegressionWithSGD private[mllib] (
* NOTE: Labels used in Logistic Regression should be {0, 1}
*/
@Since("0.8.0")
+@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
object LogisticRegressionWithSGD {
// NOTE(shivaram): We use multiple train methods instead of default arguments to support
// Java programs.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index d55e5dfdaa..ef8c80f0cb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -85,6 +85,8 @@ object LassoModel extends Loader[LassoModel] {
* See also the documentation for the precise formulation.
*/
@Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+ "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
class LassoWithSGD private (
private var stepSize: Double,
private var numIterations: Int,
@@ -118,6 +120,8 @@ class LassoWithSGD private (
*
*/
@Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+ "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
object LassoWithSGD {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index e754e74492..9e9d98bc5e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -86,6 +86,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
* See also the documentation for the precise formulation.
*/
@Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
class LinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
@@ -119,6 +120,7 @@ class LinearRegressionWithSGD private[mllib] (
*
*/
@Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
object LinearRegressionWithSGD {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 0a44ff559d..512fb9a712 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -86,6 +86,8 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
* See also the documentation for the precise formulation.
*/
@Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+ "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
class RidgeRegressionWithSGD private (
private var stepSize: Double,
private var numIterations: Int,
@@ -119,6 +121,8 @@ class RidgeRegressionWithSGD private (
*
*/
@Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+ "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
object RidgeRegressionWithSGD {
/**
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 57106f8690..fe5b6844bf 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -16,6 +16,7 @@
#
from math import exp
+import warnings
import numpy
from numpy import array
@@ -266,6 +267,8 @@ class LogisticRegressionModel(LinearClassificationModel):
class LogisticRegressionWithSGD(object):
"""
.. versionadded:: 0.9.0
+ .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or
+ LogisticRegressionWithLBFGS.
"""
@classmethod
@since('0.9.0')
@@ -312,6 +315,10 @@ class LogisticRegressionWithSGD(object):
A condition which decides iteration termination.
(default: 0.001)
"""
+ warnings.warn(
+ "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
+ "LogisticRegressionWithLBFGS.")
+
def train(rdd, i):
return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
float(step), float(miniBatchFraction), i, float(regParam), regType,
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 3b77a62000..639c5eabaa 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -17,6 +17,7 @@
import numpy as np
from numpy import array
+import warnings
from pyspark import RDD, since
from pyspark.streaming.dstream import DStream
@@ -221,6 +222,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
class LinearRegressionWithSGD(object):
"""
.. versionadded:: 0.9.0
+ .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression.
"""
@classmethod
@since("0.9.0")
@@ -276,6 +278,8 @@ class LinearRegressionWithSGD(object):
A condition which decides iteration termination.
(default: 0.001)
"""
+ warnings.warn("Deprecated in 2.0.0. Use ml.regression.LinearRegression.")
+
def train(rdd, i):
return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
float(step), float(miniBatchFraction), i, float(regParam),
@@ -366,6 +370,8 @@ class LassoModel(LinearRegressionModelBase):
class LassoWithSGD(object):
"""
.. versionadded:: 0.9.0
+ .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0.
+ Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.
"""
@classmethod
@since("0.9.0")
@@ -413,6 +419,10 @@ class LassoWithSGD(object):
A condition which decides iteration termination.
(default: 0.001)
"""
+ warnings.warn(
+ "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
+ "Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.")
+
def train(rdd, i):
return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
float(regParam), float(miniBatchFraction), i, bool(intercept),
@@ -503,6 +513,9 @@ class RidgeRegressionModel(LinearRegressionModelBase):
class RidgeRegressionWithSGD(object):
"""
.. versionadded:: 0.9.0
+ .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0.
+ Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for
+ LinearRegression.
"""
@classmethod
@since("0.9.0")
@@ -550,6 +563,11 @@ class RidgeRegressionWithSGD(object):
A condition which decides iteration termination.
(default: 0.001)
"""
+ warnings.warn(
+ "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
+ "Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for "
+ "LinearRegression.")
+
def train(rdd, i):
return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
float(regParam), float(miniBatchFraction), i, bool(intercept),