From c8f25459ed4ad6b51a5f11665364cfe0b84f7b3c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 4 Mar 2016 08:25:41 -0800 Subject: [SPARK-13676] Fix mismatched default values for regParam in LogisticRegression ## What changes were proposed in this pull request? The default value of regularization parameter for `LogisticRegression` algorithm is different in Scala and Python. We should provide the same value. **Scala** ``` scala> new org.apache.spark.ml.classification.LogisticRegression().getRegParam res0: Double = 0.0 ``` **Python** ``` >>> from pyspark.ml.classification import LogisticRegression >>> LogisticRegression().getRegParam() 0.1 ``` ## How was this patch tested? manual. Check the following in `pyspark`. ``` >>> from pyspark.ml.classification import LogisticRegression >>> LogisticRegression().getRegParam() 0.0 ``` Author: Dongjoon Hyun Closes #11519 from dongjoon-hyun/SPARK-13676. --- python/pyspark/ml/classification.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'python/pyspark') diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 253af15cb5..29d1d203f2 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -79,12 +79,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, threshold=0.5, thresholds=None, probabilityCol="probability", rawPredictionCol="rawPrediction", standardization=True, weightCol=None): """ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ threshold=0.5, thresholds=None, probabilityCol="probability", \ rawPredictionCol="rawPrediction", standardization=True, weightCol=None) If the threshold and thresholds Params are both set, they must be equivalent. @@ -92,7 +92,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti super(LogisticRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.LogisticRegression", self.uid) - self._setDefault(maxIter=100, regParam=0.1, tol=1E-6, threshold=0.5) + self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) self._checkThresholdConsistency() @@ -100,12 +100,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti @keyword_only @since("1.3.0") def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, threshold=0.5, thresholds=None, probabilityCol="probability", rawPredictionCol="rawPrediction", standardization=True, weightCol=None): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ threshold=0.5, thresholds=None, probabilityCol="probability", \ rawPredictionCol="rawPrediction", standardization=True, weightCol=None) Sets params for logistic regression. -- cgit v1.2.3