diff options
author | Burak Yavuz <brkyvz@gmail.com> | 2015-05-13 15:13:09 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-05-13 15:13:09 -0700 |
commit | df2fb1305aba6781017b0973b0965b664f835e31 (patch) | |
tree | e0dce17f9e1e73831b550e395d81949fcfdecca0 /python/pyspark/ml/param | |
parent | 61e05fc58e1245de871c409b60951745b5db3420 (diff) | |
download | spark-df2fb1305aba6781017b0973b0965b664f835e31.tar.gz spark-df2fb1305aba6781017b0973b0965b664f835e31.tar.bz2 spark-df2fb1305aba6781017b0973b0965b664f835e31.zip |
[SPARK-7382] [MLLIB] Feature Parity in PySpark for ml.classification
The missing pieces in ml.classification for Python!
cc mengxr
Author: Burak Yavuz <brkyvz@gmail.com>
Closes #6106 from brkyvz/ml-class and squashes the following commits:
dd78237 [Burak Yavuz] fix style
1048e29 [Burak Yavuz] ready for PR
Diffstat (limited to 'python/pyspark/ml/param')
-rw-r--r-- | python/pyspark/ml/param/_shared_params_code_gen.py | 4 | ||||
-rw-r--r-- | python/pyspark/ml/param/shared.py | 29 |
2 files changed, 33 insertions, 0 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index 4a5cc6e64f..6fa9b8c2cf 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -109,6 +109,9 @@ if __name__ == "__main__": ("featuresCol", "features column name", "'features'"), ("labelCol", "label column name", "'label'"), ("predictionCol", "prediction column name", "'prediction'"), + ("probabilityCol", "Column name for predicted class conditional probabilities. " + + "Note: Not all models output well-calibrated probability estimates! These probabilities " + + "should be treated as confidences, not precise probabilities.", "'probability'"), ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"), ("inputCol", "input column name", None), ("inputCols", "input column names", None), @@ -156,6 +159,7 @@ if __name__ == "__main__": for name, doc in decisionTreeParams: variable = paramTemplate.replace("$name", name).replace("$doc", doc) dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n " + realParams += "#: param for " + doc + "\n " realParams += "self." + variable.replace("$owner", "self") + "\n " dtParamMethods += _gen_param_code(name, doc, None) + "\n" code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders) diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index 779cabe853..b116f05a06 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -165,6 +165,35 @@ class HasPredictionCol(Params): return self.getOrDefault(self.predictionCol) +class HasProbabilityCol(Params): + """ + Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.. + """ + + # a placeholder to make it appear in the generated doc + probabilityCol = Param(Params._dummy(), "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.") + + def __init__(self): + super(HasProbabilityCol, self).__init__() + #: param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities. + self.probabilityCol = Param(self, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.") + if 'probability' is not None: + self._setDefault(probabilityCol='probability') + + def setProbabilityCol(self, value): + """ + Sets the value of :py:attr:`probabilityCol`. + """ + self.paramMap[self.probabilityCol] = value + return self + + def getProbabilityCol(self): + """ + Gets the value of probabilityCol or its default value. + """ + return self.getOrDefault(self.probabilityCol) + + class HasRawPredictionCol(Params): """ Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name. |