aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml
diff options
context:
space:
mode:
authorHolden Karau <holden@us.ibm.com>2016-06-22 11:54:49 +0200
committerNick Pentreath <nickp@za.ibm.com>2016-06-22 11:54:49 +0200
commitd281b0bafe6aa23085d4d2b68f0ce321f1978b50 (patch)
tree549918d10890cbb2ec58ff1dab1e6d282be83fe9 /python/pyspark/ml
parent0e3ce75332dd536c0db8467d456ad46e4bf228f4 (diff)
downloadspark-d281b0bafe6aa23085d4d2b68f0ce321f1978b50.tar.gz
spark-d281b0bafe6aa23085d4d2b68f0ce321f1978b50.tar.bz2
spark-d281b0bafe6aa23085d4d2b68f0ce321f1978b50.zip
[SPARK-15162][SPARK-15164][PYSPARK][DOCS][ML] update some pydocs
## What changes were proposed in this pull request? Mark ml.classification algorithms as experimental to match Scala algorithms, update PyDoc for for thresholds on `LogisticRegression` to have same level of info as Scala, and enable mathjax for PyDoc. ## How was this patch tested? Built docs locally & PySpark SQL tests Author: Holden Karau <holden@us.ibm.com> Closes #12938 from holdenk/SPARK-15162-SPARK-15164-update-some-pydocs.
Diffstat (limited to 'python/pyspark/ml')
-rw-r--r--python/pyspark/ml/classification.py38
1 files changed, 36 insertions, 2 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index e86c27ecaf..d6d713ca53 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -49,6 +49,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
HasWeightCol, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Logistic regression.
Currently, this class only supports binary classification.
@@ -96,7 +98,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
threshold = Param(Params._dummy(), "threshold",
"Threshold in binary classification prediction, in range [0, 1]." +
- " If threshold and thresholds are both set, they must match.",
+ " If threshold and thresholds are both set, they must match." +
+ "e.g. if threshold is p, then thresholds must be equal to [1-p, p].",
typeConverter=TypeConverters.toFloat)
@keyword_only
@@ -154,7 +157,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
@since("1.4.0")
def getThreshold(self):
"""
- Gets the value of threshold or its default value.
+ Get threshold for binary classification.
+
+ If :py:attr:`thresholds` is set with length 2 (i.e., binary classification),
+ this returns the equivalent threshold:
+ :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+ Otherwise, returns :py:attr:`threshold` if set or its default value if unset.
"""
self._checkThresholdConsistency()
if self.isSet(self.thresholds):
@@ -208,6 +216,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Model fitted by LogisticRegression.
.. versionadded:: 1.3.0
@@ -491,6 +501,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
JavaMLReadable):
"""
+ .. note:: Experimental
+
`Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
learning algorithm for classification.
It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -587,6 +599,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
@inherit_doc
class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Model fitted by DecisionTreeClassifier.
.. versionadded:: 1.4.0
@@ -620,6 +634,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
learning algorithm for classification.
It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -714,6 +730,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Model fitted by RandomForestClassifier.
.. versionadded:: 1.4.0
@@ -746,6 +764,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
JavaMLReadable):
"""
+ .. note:: Experimental
+
`Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
learning algorithm for classification.
It supports binary labels, as well as both continuous and categorical features.
@@ -865,6 +885,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Model fitted by GBTClassifier.
.. versionadded:: 1.4.0
@@ -896,6 +918,8 @@ class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable)
class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Naive Bayes Classifiers.
It supports both Multinomial and Bernoulli NB. `Multinomial NB
<http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_
@@ -1019,6 +1043,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Model fitted by NaiveBayes.
.. versionadded:: 1.5.0
@@ -1046,6 +1072,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable,
JavaMLReadable):
"""
+ .. note:: Experimental
+
Classifier trainer based on the Multilayer Perceptron.
Each layer has sigmoid activation function, output layer has softmax.
Number of inputs has to be equal to the size of feature vectors.
@@ -1216,6 +1244,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
+ .. note:: Experimental
+
Model fitted by MultilayerPerceptronClassifier.
.. versionadded:: 1.6.0
@@ -1265,6 +1295,8 @@ class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasPredictionCol):
@inherit_doc
class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
"""
+ .. note:: Experimental
+
Reduction of Multiclass Classification to Binary Classification.
Performs reduction using one against all strategy.
For a multiclass classification with k classes, train k models (one per class).
@@ -1419,6 +1451,8 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
class OneVsRestModel(Model, OneVsRestParams, MLReadable, MLWritable):
"""
+ .. note:: Experimental
+
Model fitted by OneVsRest.
This stores the models resulting from training k binary classifiers: one for each class.
Each example is scored against all k models, and the model with the highest score