aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/classification.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/mllib/classification.py')
-rw-r--r--python/pyspark/mllib/classification.py37
1 files changed, 25 insertions, 12 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index aab4015ba8..9e6f17ef6e 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -652,21 +652,34 @@ class NaiveBayes(object):
@inherit_doc
class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
"""
- Run LogisticRegression with SGD on a batch of data.
-
- The weights obtained at the end of training a stream are used as initial
- weights for the next batch.
-
- :param stepSize: Step size for each iteration of gradient descent.
- :param numIterations: Number of iterations run for each batch of data.
- :param miniBatchFraction: Fraction of data on which SGD is run for each
- iteration.
- :param regParam: L2 Regularization parameter.
- :param convergenceTol: A condition which decides iteration termination.
+ Train or predict a logistic regression model on streaming data. Training uses
+ Stochastic Gradient Descent to update the model based on each new batch of
+ incoming data from a DStream.
+
+ Each batch of data is assumed to be an RDD of LabeledPoints.
+ The number of data points per batch can vary, but the number
+ of features must be constant. An initial weight
+ vector must be provided.
+
+ :param stepSize:
+ Step size for each iteration of gradient descent.
+ (default: 0.1)
+ :param numIterations:
+ Number of iterations run for each batch of data.
+ (default: 50)
+ :param miniBatchFraction:
+ Fraction of each batch of data to use for updates.
+ (default: 1.0)
+ :param regParam:
+ L2 Regularization parameter.
+ (default: 0.0)
+ :param convergenceTol:
+ Value used to determine when to terminate iterations.
+ (default: 0.001)
.. versionadded:: 1.5.0
"""
- def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, regParam=0.01,
+ def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, regParam=0.0,
convergenceTol=0.001):
self.stepSize = stepSize
self.numIterations = numIterations