From d281b0bafe6aa23085d4d2b68f0ce321f1978b50 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 22 Jun 2016 11:54:49 +0200 Subject: [SPARK-15162][SPARK-15164][PYSPARK][DOCS][ML] update some pydocs ## What changes were proposed in this pull request? Mark ml.classification algorithms as experimental to match Scala algorithms, update PyDoc for for thresholds on `LogisticRegression` to have same level of info as Scala, and enable mathjax for PyDoc. ## How was this patch tested? Built docs locally & PySpark SQL tests Author: Holden Karau Closes #12938 from holdenk/SPARK-15162-SPARK-15164-update-some-pydocs. --- python/pyspark/ml/classification.py | 38 +++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'python/pyspark/ml/classification.py') diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index e86c27ecaf..d6d713ca53 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -49,6 +49,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Logistic regression. Currently, this class only supports binary classification. @@ -96,7 +98,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti threshold = Param(Params._dummy(), "threshold", "Threshold in binary classification prediction, in range [0, 1]." + - " If threshold and thresholds are both set, they must match.", + " If threshold and thresholds are both set, they must match." + + "e.g. if threshold is p, then thresholds must be equal to [1-p, p].", typeConverter=TypeConverters.toFloat) @keyword_only @@ -154,7 +157,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti @since("1.4.0") def getThreshold(self): """ - Gets the value of threshold or its default value. + Get threshold for binary classification. + + If :py:attr:`thresholds` is set with length 2 (i.e., binary classification), + this returns the equivalent threshold: + :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`. + Otherwise, returns :py:attr:`threshold` if set or its default value if unset. """ self._checkThresholdConsistency() if self.isSet(self.thresholds): @@ -208,6 +216,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by LogisticRegression. .. versionadded:: 1.3.0 @@ -491,6 +501,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Decision tree `_ learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical @@ -587,6 +599,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred @inherit_doc class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by DecisionTreeClassifier. .. versionadded:: 1.4.0 @@ -620,6 +634,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred RandomForestParams, TreeClassifierParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Random Forest `_ learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical @@ -714,6 +730,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by RandomForestClassifier. .. versionadded:: 1.4.0 @@ -746,6 +764,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Gradient-Boosted Trees (GBTs) `_ learning algorithm for classification. It supports binary labels, as well as both continuous and categorical features. @@ -865,6 +885,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by GBTClassifier. .. versionadded:: 1.4.0 @@ -896,6 +918,8 @@ class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable) class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Naive Bayes Classifiers. It supports both Multinomial and Bernoulli NB. `Multinomial NB `_ @@ -1019,6 +1043,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by NaiveBayes. .. versionadded:: 1.5.0 @@ -1046,6 +1072,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Classifier trainer based on the Multilayer Perceptron. Each layer has sigmoid activation function, output layer has softmax. Number of inputs has to be equal to the size of feature vectors. @@ -1216,6 +1244,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by MultilayerPerceptronClassifier. .. versionadded:: 1.6.0 @@ -1265,6 +1295,8 @@ class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasPredictionCol): @inherit_doc class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable): """ + .. note:: Experimental + Reduction of Multiclass Classification to Binary Classification. Performs reduction using one against all strategy. For a multiclass classification with k classes, train k models (one per class). @@ -1419,6 +1451,8 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable): class OneVsRestModel(Model, OneVsRestParams, MLReadable, MLWritable): """ + .. note:: Experimental + Model fitted by OneVsRest. This stores the models resulting from training k binary classifiers: one for each class. Each example is scored against all k models, and the model with the highest score -- cgit v1.2.3