aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/pyspark/ml/classification.py75
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py11
-rw-r--r--python/pyspark/ml/param/shared.py111
-rw-r--r--python/pyspark/ml/regression.py42
4 files changed, 143 insertions, 96 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 83f808efc3..22bdd1b322 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -31,7 +31,8 @@ __all__ = ['LogisticRegression', 'LogisticRegressionModel', 'DecisionTreeClassif
@inherit_doc
class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
- HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol):
+ HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol,
+ HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds):
"""
Logistic regression.
Currently, this class only supports binary classification.
@@ -65,17 +66,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
"""
# a placeholder to make it appear in the generated doc
- elasticNetParam = \
- Param(Params._dummy(), "elasticNetParam",
- "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
- "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
- fitIntercept = Param(Params._dummy(), "fitIntercept", "whether to fit an intercept term.")
- thresholds = Param(Params._dummy(), "thresholds",
- "Thresholds in multi-class classification" +
- " to adjust the probability of predicting each class." +
- " Array must have length equal to the number of classes, with values >= 0." +
- " The class with largest value p/t is predicted, where p is the original" +
- " probability of that class and t is the class' threshold.")
threshold = Param(Params._dummy(), "threshold",
"Threshold in binary classification prediction, in range [0, 1]." +
" If threshold and thresholds are both set, they must match.")
@@ -83,40 +73,23 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
- threshold=0.5, thresholds=None,
- probabilityCol="probability", rawPredictionCol="rawPrediction"):
+ threshold=0.5, thresholds=None, probabilityCol="probability",
+ rawPredictionCol="rawPrediction", standardization=True):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
- threshold=0.5, thresholds=None, \
- probabilityCol="probability", rawPredictionCol="rawPrediction")
+ threshold=0.5, thresholds=None, probabilityCol="probability", \
+ rawPredictionCol="rawPrediction", standardization=True)
If the threshold and thresholds Params are both set, they must be equivalent.
"""
super(LogisticRegression, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.classification.LogisticRegression", self.uid)
- #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
- # is an L2 penalty. For alpha = 1, it is an L1 penalty.
- self.elasticNetParam = \
- Param(self, "elasticNetParam",
- "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
- "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
- #: param for whether to fit an intercept term.
- self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
#: param for threshold in binary classification, in range [0, 1].
self.threshold = Param(self, "threshold",
"Threshold in binary classification prediction, in range [0, 1]." +
" If threshold and thresholds are both set, they must match.")
- #: param for thresholds or cutoffs in binary or multiclass classification
- self.thresholds = \
- Param(self, "thresholds",
- "Thresholds in multi-class classification" +
- " to adjust the probability of predicting each class." +
- " Array must have length equal to the number of classes, with values >= 0." +
- " The class with largest value p/t is predicted, where p is the original" +
- " probability of that class and t is the class' threshold.")
- self._setDefault(maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1E-6,
- fitIntercept=True, threshold=0.5)
+ self._setDefault(maxIter=100, regParam=0.1, tol=1E-6, threshold=0.5)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
self._checkThresholdConsistency()
@@ -124,13 +97,13 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
@keyword_only
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
- threshold=0.5, thresholds=None,
- probabilityCol="probability", rawPredictionCol="rawPrediction"):
+ threshold=0.5, thresholds=None, probabilityCol="probability",
+ rawPredictionCol="rawPrediction", standardization=True):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
- threshold=0.5, thresholds=None, \
- probabilityCol="probability", rawPredictionCol="rawPrediction")
+ threshold=0.5, thresholds=None, probabilityCol="probability", \
+ rawPredictionCol="rawPrediction", standardization=True)
Sets params for logistic regression.
If the threshold and thresholds Params are both set, they must be equivalent.
"""
@@ -142,32 +115,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
def _create_model(self, java_model):
return LogisticRegressionModel(java_model)
- def setElasticNetParam(self, value):
- """
- Sets the value of :py:attr:`elasticNetParam`.
- """
- self._paramMap[self.elasticNetParam] = value
- return self
-
- def getElasticNetParam(self):
- """
- Gets the value of elasticNetParam or its default value.
- """
- return self.getOrDefault(self.elasticNetParam)
-
- def setFitIntercept(self, value):
- """
- Sets the value of :py:attr:`fitIntercept`.
- """
- self._paramMap[self.fitIntercept] = value
- return self
-
- def getFitIntercept(self):
- """
- Gets the value of fitIntercept or its default value.
- """
- return self.getOrDefault(self.fitIntercept)
-
def setThreshold(self, value):
"""
Sets the value of :py:attr:`threshold`.
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 926375e448..5b39e5dd4e 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -124,7 +124,16 @@ if __name__ == "__main__":
("stepSize", "Step size to be used for each iteration of optimization.", None),
("handleInvalid", "how to handle invalid entries. Options are skip (which will filter " +
"out rows with bad values), or error (which will throw an errror). More options may be " +
- "added later.", None)]
+ "added later.", None),
+ ("elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+ "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", "0.0"),
+ ("fitIntercept", "whether to fit an intercept term.", "True"),
+ ("standardization", "whether to standardize the training features before fitting the " +
+ "model.", "True"),
+ ("thresholds", "Thresholds in multi-class classification to adjust the probability of " +
+ "predicting each class. Array must have length equal to the number of classes, with " +
+ "values >= 0. The class with largest value p/t is predicted, where p is the original " +
+ "probability of that class and t is the class' threshold.", None)]
code = []
for name, doc, defaultValueStr in shared:
param_code = _gen_param_header(name, doc, defaultValueStr)
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 682170aee8..af12181286 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -459,6 +459,117 @@ class HasHandleInvalid(Params):
return self.getOrDefault(self.handleInvalid)
+class HasElasticNetParam(Params):
+ """
+ Mixin for param elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty..
+ """
+
+ # a placeholder to make it appear in the generated doc
+ elasticNetParam = Param(Params._dummy(), "elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+
+ def __init__(self):
+ super(HasElasticNetParam, self).__init__()
+ #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
+ self.elasticNetParam = Param(self, "elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+ self._setDefault(elasticNetParam=0.0)
+
+ def setElasticNetParam(self, value):
+ """
+ Sets the value of :py:attr:`elasticNetParam`.
+ """
+ self._paramMap[self.elasticNetParam] = value
+ return self
+
+ def getElasticNetParam(self):
+ """
+ Gets the value of elasticNetParam or its default value.
+ """
+ return self.getOrDefault(self.elasticNetParam)
+
+
+class HasFitIntercept(Params):
+ """
+ Mixin for param fitIntercept: whether to fit an intercept term..
+ """
+
+ # a placeholder to make it appear in the generated doc
+ fitIntercept = Param(Params._dummy(), "fitIntercept", "whether to fit an intercept term.")
+
+ def __init__(self):
+ super(HasFitIntercept, self).__init__()
+ #: param for whether to fit an intercept term.
+ self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
+ self._setDefault(fitIntercept=True)
+
+ def setFitIntercept(self, value):
+ """
+ Sets the value of :py:attr:`fitIntercept`.
+ """
+ self._paramMap[self.fitIntercept] = value
+ return self
+
+ def getFitIntercept(self):
+ """
+ Gets the value of fitIntercept or its default value.
+ """
+ return self.getOrDefault(self.fitIntercept)
+
+
+class HasStandardization(Params):
+ """
+ Mixin for param standardization: whether to standardize the training features before fitting the model..
+ """
+
+ # a placeholder to make it appear in the generated doc
+ standardization = Param(Params._dummy(), "standardization", "whether to standardize the training features before fitting the model.")
+
+ def __init__(self):
+ super(HasStandardization, self).__init__()
+ #: param for whether to standardize the training features before fitting the model.
+ self.standardization = Param(self, "standardization", "whether to standardize the training features before fitting the model.")
+ self._setDefault(standardization=True)
+
+ def setStandardization(self, value):
+ """
+ Sets the value of :py:attr:`standardization`.
+ """
+ self._paramMap[self.standardization] = value
+ return self
+
+ def getStandardization(self):
+ """
+ Gets the value of standardization or its default value.
+ """
+ return self.getOrDefault(self.standardization)
+
+
+class HasThresholds(Params):
+ """
+ Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold..
+ """
+
+ # a placeholder to make it appear in the generated doc
+ thresholds = Param(Params._dummy(), "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold.")
+
+ def __init__(self):
+ super(HasThresholds, self).__init__()
+ #: param for Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold.
+ self.thresholds = Param(self, "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold.")
+
+ def setThresholds(self, value):
+ """
+ Sets the value of :py:attr:`thresholds`.
+ """
+ self._paramMap[self.thresholds] = value
+ return self
+
+ def getThresholds(self):
+ """
+ Gets the value of thresholds or its default value.
+ """
+ return self.getOrDefault(self.thresholds)
+
+
class DecisionTreeParams(Params):
"""
Mixin for Decision Tree parameters.
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 44f60a7695..a9503608b7 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -28,7 +28,8 @@ __all__ = ['DecisionTreeRegressor', 'DecisionTreeRegressionModel', 'GBTRegressor
@inherit_doc
class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
- HasRegParam, HasTol):
+ HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
+ HasStandardization):
"""
Linear regression.
@@ -63,38 +64,30 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
TypeError: Method setParams forces keyword arguments.
"""
- # a placeholder to make it appear in the generated doc
- elasticNetParam = \
- Param(Params._dummy(), "elasticNetParam",
- "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
- "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
-
@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6):
+ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+ standardization=True):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
+ standardization=True)
"""
super(LinearRegression, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.regression.LinearRegression", self.uid)
- #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
- # is an L2 penalty. For alpha = 1, it is an L1 penalty.
- self.elasticNetParam = \
- Param(self, "elasticNetParam",
- "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
- "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
- self._setDefault(maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+ self._setDefault(maxIter=100, regParam=0.0, tol=1e-6)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@keyword_only
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6):
+ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+ standardization=True):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
+ standardization=True)
Sets params for linear regression.
"""
kwargs = self.setParams._input_kwargs
@@ -103,19 +96,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
def _create_model(self, java_model):
return LinearRegressionModel(java_model)
- def setElasticNetParam(self, value):
- """
- Sets the value of :py:attr:`elasticNetParam`.
- """
- self._paramMap[self.elasticNetParam] = value
- return self
-
- def getElasticNetParam(self):
- """
- Gets the value of elasticNetParam or its default value.
- """
- return self.getOrDefault(self.elasticNetParam)
-
class LinearRegressionModel(JavaModel):
"""