aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/classification.py36
-rw-r--r--python/pyspark/mllib/regression.py36
2 files changed, 37 insertions, 35 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 5d90dddb5d..b654813fb4 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -76,7 +76,7 @@ class LogisticRegressionWithSGD(object):
@classmethod
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
- initialWeights=None, regParam=1.0, regType="none", intercept=False):
+ initialWeights=None, regParam=0.01, regType="l2", intercept=False):
"""
Train a logistic regression model on the given data.
@@ -87,16 +87,16 @@ class LogisticRegressionWithSGD(object):
:param miniBatchFraction: Fraction of data to be used for each SGD
iteration.
:param initialWeights: The initial weights (default: None).
- :param regParam: The regularizer parameter (default: 1.0).
+ :param regParam: The regularizer parameter (default: 0.01).
:param regType: The type of regularizer used for training
our model.
:Allowed values:
- - "l1" for using L1Updater
- - "l2" for using SquaredL2Updater
- - "none" for no regularizer
+ - "l1" for using L1 regularization
+ - "l2" for using L2 regularization
+ - None for no regularization
- (default: "none")
+ (default: "l2")
@param intercept: Boolean parameter which indicates the use
or not of the augmented representation for
@@ -104,8 +104,9 @@ class LogisticRegressionWithSGD(object):
are activated or not).
"""
def train(rdd, i):
- return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, iterations, step,
- miniBatchFraction, i, regParam, regType, intercept)
+ return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
+ float(step), float(miniBatchFraction), i, float(regParam), regType,
+ bool(intercept))
return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
@@ -145,8 +146,8 @@ class SVMModel(LinearModel):
class SVMWithSGD(object):
@classmethod
- def train(cls, data, iterations=100, step=1.0, regParam=1.0,
- miniBatchFraction=1.0, initialWeights=None, regType="none", intercept=False):
+ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+ miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False):
"""
Train a support vector machine on the given data.
@@ -154,7 +155,7 @@ class SVMWithSGD(object):
:param iterations: The number of iterations (default: 100).
:param step: The step parameter used in SGD
(default: 1.0).
- :param regParam: The regularizer parameter (default: 1.0).
+ :param regParam: The regularizer parameter (default: 0.01).
:param miniBatchFraction: Fraction of data to be used for each SGD
iteration.
:param initialWeights: The initial weights (default: None).
@@ -162,11 +163,11 @@ class SVMWithSGD(object):
our model.
:Allowed values:
- - "l1" for using L1Updater
- - "l2" for using SquaredL2Updater,
- - "none" for no regularizer.
+ - "l1" for using L1 regularization
+ - "l2" for using L2 regularization
+ - None for no regularization
- (default: "none")
+ (default: "l2")
@param intercept: Boolean parameter which indicates the use
or not of the augmented representation for
@@ -174,8 +175,9 @@ class SVMWithSGD(object):
are activated or not).
"""
def train(rdd, i):
- return callMLlibFunc("trainSVMModelWithSGD", rdd, iterations, step, regParam,
- miniBatchFraction, i, regType, intercept)
+ return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step),
+ float(regParam), float(miniBatchFraction), i, regType,
+ bool(intercept))
return _regression_train_wrapper(train, SVMModel, data, initialWeights)
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 66e25a48df..f4f5e615fa 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -138,7 +138,7 @@ class LinearRegressionWithSGD(object):
@classmethod
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
- initialWeights=None, regParam=1.0, regType="none", intercept=False):
+ initialWeights=None, regParam=0.0, regType=None, intercept=False):
"""
Train a linear regression model on the given data.
@@ -149,16 +149,16 @@ class LinearRegressionWithSGD(object):
:param miniBatchFraction: Fraction of data to be used for each SGD
iteration.
:param initialWeights: The initial weights (default: None).
- :param regParam: The regularizer parameter (default: 1.0).
+ :param regParam: The regularizer parameter (default: 0.0).
:param regType: The type of regularizer used for training
our model.
:Allowed values:
- - "l1" for using L1Updater,
- - "l2" for using SquaredL2Updater,
- - "none" for no regularizer.
+ - "l1" for using L1 regularization (lasso),
+ - "l2" for using L2 regularization (ridge),
+ - None for no regularization
- (default: "none")
+ (default: None)
@param intercept: Boolean parameter which indicates the use
or not of the augmented representation for
@@ -166,11 +166,11 @@ class LinearRegressionWithSGD(object):
are activated or not).
"""
def train(rdd, i):
- return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, iterations, step,
- miniBatchFraction, i, regParam, regType, intercept)
+ return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
+ float(step), float(miniBatchFraction), i, float(regParam),
+ regType, bool(intercept))
- return _regression_train_wrapper(train, LinearRegressionModel,
- data, initialWeights)
+ return _regression_train_wrapper(train, LinearRegressionModel, data, initialWeights)
class LassoModel(LinearRegressionModelBase):
@@ -209,12 +209,13 @@ class LassoModel(LinearRegressionModelBase):
class LassoWithSGD(object):
@classmethod
- def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
miniBatchFraction=1.0, initialWeights=None):
"""Train a Lasso regression model on the given data."""
def train(rdd, i):
- return callMLlibFunc("trainLassoModelWithSGD", rdd, iterations, step, regParam,
- miniBatchFraction, i)
+ return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
+ float(regParam), float(miniBatchFraction), i)
+
return _regression_train_wrapper(train, LassoModel, data, initialWeights)
@@ -254,15 +255,14 @@ class RidgeRegressionModel(LinearRegressionModelBase):
class RidgeRegressionWithSGD(object):
@classmethod
- def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
miniBatchFraction=1.0, initialWeights=None):
"""Train a ridge regression model on the given data."""
def train(rdd, i):
- return callMLlibFunc("trainRidgeModelWithSGD", rdd, iterations, step, regParam,
- miniBatchFraction, i)
+ return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
+ float(regParam), float(miniBatchFraction), i)
- return _regression_train_wrapper(train, RidgeRegressionModel,
- data, initialWeights)
+ return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights)
def _test():