From 762bacc16ac5e74c8b05a7c1e3e367d1d1633cef Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 12 Aug 2015 13:24:18 -0700
Subject: [SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML

Check and add miss docs for PySpark ML (this issue only check miss docs for o.a.s.ml not o.a.s.mllib).

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #8059 from yanboliang/SPARK-9766.
---
 python/pyspark/ml/classification.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'python/pyspark/ml/classification.py')

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 5978d8f4d3..6702dce554 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -34,6 +34,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
                          HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol):
     """
     Logistic regression.
+    Currently, this class only supports binary classification.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors
@@ -96,8 +97,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
         #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
         self.elasticNetParam = \
             Param(self, "elasticNetParam",
-                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
-                  "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+                  "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
         #: param for whether to fit an intercept term.
         self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
         #: param for threshold in binary classification prediction, in range [0, 1].
@@ -656,6 +657,13 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
                  HasRawPredictionCol):
     """
     Naive Bayes Classifiers.
+    It supports both Multinomial and Bernoulli NB. Multinomial NB
+    (`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`)
+    can handle finitely supported discrete data. For example, by converting documents into
+    TF-IDF vectors, it can be used for document classification. By making every vector a
+    binary (0/1) data, it can also be used as Bernoulli NB
+    (`http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html`).
+    The input feature values must be nonnegative.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.mllib.linalg import Vectors
-- 
cgit v1.2.3