diff options
author | Sean Owen <sowen@cloudera.com> | 2016-09-24 08:15:55 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-09-24 08:15:55 +0100 |
commit | 248916f5589155c0c3e93c3874781f17b08d598d (patch) | |
tree | 4e3183ffc5d59e09edc8b54ddc2af4fc67abb05b /python/pyspark | |
parent | f3fe55439e4c865c26502487a1bccf255da33f4a (diff) | |
download | spark-248916f5589155c0c3e93c3874781f17b08d598d.tar.gz spark-248916f5589155c0c3e93c3874781f17b08d598d.tar.bz2 spark-248916f5589155c0c3e93c3874781f17b08d598d.zip |
[SPARK-17057][ML] ProbabilisticClassifierModels' thresholds should have at most one 0
## What changes were proposed in this pull request?
Match ProbabilisticClassifer.thresholds requirements to R randomForest cutoff, requiring all > 0
## How was this patch tested?
Jenkins tests plus new test cases
Author: Sean Owen <sowen@cloudera.com>
Closes #15149 from srowen/SPARK-17057.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/ml/param/_shared_params_code_gen.py | 5 | ||||
-rw-r--r-- | python/pyspark/ml/param/shared.py | 4 |
2 files changed, 5 insertions, 4 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index 4f4328bcad..929591236d 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -139,8 +139,9 @@ if __name__ == "__main__": "model.", "True", "TypeConverters.toBoolean"), ("thresholds", "Thresholds in multi-class classification to adjust the probability of " + "predicting each class. Array must have length equal to the number of classes, with " + - "values >= 0. The class with largest value p/t is predicted, where p is the original " + - "probability of that class and t is the class' threshold.", None, + "values > 0, excepting that at most one value may be 0. " + + "The class with largest value p/t is predicted, where p is the original " + + "probability of that class and t is the class's threshold.", None, "TypeConverters.toListFloat"), ("weightCol", "weight column name. If this is not set or empty, we treat " + "all instance weights as 1.0.", None, "TypeConverters.toString"), diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index 24af07afc7..cc596936d8 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -469,10 +469,10 @@ class HasStandardization(Params): class HasThresholds(Params): """ - Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold. + Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold. """ - thresholds = Param(Params._dummy(), "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold.", typeConverter=TypeConverters.toListFloat) + thresholds = Param(Params._dummy(), "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.", typeConverter=TypeConverters.toListFloat) def __init__(self): super(HasThresholds, self).__init__() |