aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/param
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-05-13 15:13:09 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-13 15:13:09 -0700
commitdf2fb1305aba6781017b0973b0965b664f835e31 (patch)
treee0dce17f9e1e73831b550e395d81949fcfdecca0 /python/pyspark/ml/param
parent61e05fc58e1245de871c409b60951745b5db3420 (diff)
downloadspark-df2fb1305aba6781017b0973b0965b664f835e31.tar.gz
spark-df2fb1305aba6781017b0973b0965b664f835e31.tar.bz2
spark-df2fb1305aba6781017b0973b0965b664f835e31.zip
[SPARK-7382] [MLLIB] Feature Parity in PySpark for ml.classification
The missing pieces in ml.classification for Python! cc mengxr Author: Burak Yavuz <brkyvz@gmail.com> Closes #6106 from brkyvz/ml-class and squashes the following commits: dd78237 [Burak Yavuz] fix style 1048e29 [Burak Yavuz] ready for PR
Diffstat (limited to 'python/pyspark/ml/param')
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py4
-rw-r--r--python/pyspark/ml/param/shared.py29
2 files changed, 33 insertions, 0 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 4a5cc6e64f..6fa9b8c2cf 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -109,6 +109,9 @@ if __name__ == "__main__":
("featuresCol", "features column name", "'features'"),
("labelCol", "label column name", "'label'"),
("predictionCol", "prediction column name", "'prediction'"),
+ ("probabilityCol", "Column name for predicted class conditional probabilities. " +
+ "Note: Not all models output well-calibrated probability estimates! These probabilities " +
+ "should be treated as confidences, not precise probabilities.", "'probability'"),
("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
("inputCol", "input column name", None),
("inputCols", "input column names", None),
@@ -156,6 +159,7 @@ if __name__ == "__main__":
for name, doc in decisionTreeParams:
variable = paramTemplate.replace("$name", name).replace("$doc", doc)
dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n "
+ realParams += "#: param for " + doc + "\n "
realParams += "self." + variable.replace("$owner", "self") + "\n "
dtParamMethods += _gen_param_code(name, doc, None) + "\n"
code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders)
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 779cabe853..b116f05a06 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -165,6 +165,35 @@ class HasPredictionCol(Params):
return self.getOrDefault(self.predictionCol)
+class HasProbabilityCol(Params):
+ """
+ Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities..
+ """
+
+ # a placeholder to make it appear in the generated doc
+ probabilityCol = Param(Params._dummy(), "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
+
+ def __init__(self):
+ super(HasProbabilityCol, self).__init__()
+ #: param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
+ self.probabilityCol = Param(self, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
+ if 'probability' is not None:
+ self._setDefault(probabilityCol='probability')
+
+ def setProbabilityCol(self, value):
+ """
+ Sets the value of :py:attr:`probabilityCol`.
+ """
+ self.paramMap[self.probabilityCol] = value
+ return self
+
+ def getProbabilityCol(self):
+ """
+ Gets the value of probabilityCol or its default value.
+ """
+ return self.getOrDefault(self.probabilityCol)
+
+
class HasRawPredictionCol(Params):
"""
Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.