diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-03-04 08:25:41 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-03-04 08:25:41 -0800 |
commit | c8f25459ed4ad6b51a5f11665364cfe0b84f7b3c (patch) | |
tree | b055ffe282e2224a298ef7acb12bf4369d5e2475 /python/pyspark/ml | |
parent | e617508244b508b59b4debb35cad3258cddbb9cf (diff) | |
download | spark-c8f25459ed4ad6b51a5f11665364cfe0b84f7b3c.tar.gz spark-c8f25459ed4ad6b51a5f11665364cfe0b84f7b3c.tar.bz2 spark-c8f25459ed4ad6b51a5f11665364cfe0b84f7b3c.zip |
[SPARK-13676] Fix mismatched default values for regParam in LogisticRegression
## What changes were proposed in this pull request?
The default value of regularization parameter for `LogisticRegression` algorithm is different in Scala and Python. We should provide the same value.
**Scala**
```
scala> new org.apache.spark.ml.classification.LogisticRegression().getRegParam
res0: Double = 0.0
```
**Python**
```
>>> from pyspark.ml.classification import LogisticRegression
>>> LogisticRegression().getRegParam()
0.1
```
## How was this patch tested?
manual. Check the following in `pyspark`.
```
>>> from pyspark.ml.classification import LogisticRegression
>>> LogisticRegression().getRegParam()
0.0
```
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #11519 from dongjoon-hyun/SPARK-13676.
Diffstat (limited to 'python/pyspark/ml')
-rw-r--r-- | python/pyspark/ml/classification.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 253af15cb5..29d1d203f2 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -79,12 +79,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, threshold=0.5, thresholds=None, probabilityCol="probability", rawPredictionCol="rawPrediction", standardization=True, weightCol=None): """ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ threshold=0.5, thresholds=None, probabilityCol="probability", \ rawPredictionCol="rawPrediction", standardization=True, weightCol=None) If the threshold and thresholds Params are both set, they must be equivalent. @@ -92,7 +92,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti super(LogisticRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.LogisticRegression", self.uid) - self._setDefault(maxIter=100, regParam=0.1, tol=1E-6, threshold=0.5) + self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) self._checkThresholdConsistency() @@ -100,12 +100,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti @keyword_only @since("1.3.0") def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, threshold=0.5, thresholds=None, probabilityCol="probability", rawPredictionCol="rawPrediction", standardization=True, weightCol=None): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ threshold=0.5, thresholds=None, probabilityCol="probability", \ rawPredictionCol="rawPrediction", standardization=True, weightCol=None) Sets params for logistic regression. |