diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2015-08-12 13:24:18 -0700 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2015-08-12 13:24:18 -0700 |
commit | 762bacc16ac5e74c8b05a7c1e3e367d1d1633cef (patch) | |
tree | da72f2717842672fcdbe092947284c0b4f009cf2 /python/pyspark/ml/classification.py | |
parent | 60103ecd3d9c92709a5878be7ebd57012813ab48 (diff) | |
download | spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.tar.gz spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.tar.bz2 spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.zip |
[SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML
Check and add miss docs for PySpark ML (this issue only check miss docs for o.a.s.ml not o.a.s.mllib).
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #8059 from yanboliang/SPARK-9766.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r-- | python/pyspark/ml/classification.py | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 5978d8f4d3..6702dce554 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -34,6 +34,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol): """ Logistic regression. + Currently, this class only supports binary classification. >>> from pyspark.sql import Row >>> from pyspark.mllib.linalg import Vectors @@ -96,8 +97,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti # is an L2 penalty. For alpha = 1, it is an L1 penalty. self.elasticNetParam = \ Param(self, "elasticNetParam", - "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " + - "is an L2 penalty. For alpha = 1, it is an L1 penalty.") + "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " + + "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.") #: param for whether to fit an intercept term. self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.") #: param for threshold in binary classification prediction, in range [0, 1]. @@ -656,6 +657,13 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H HasRawPredictionCol): """ Naive Bayes Classifiers. + It supports both Multinomial and Bernoulli NB. Multinomial NB + (`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`) + can handle finitely supported discrete data. For example, by converting documents into + TF-IDF vectors, it can be used for document classification. By making every vector a + binary (0/1) data, it can also be used as Bernoulli NB + (`http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html`). + The input feature values must be nonnegative. >>> from pyspark.sql import Row >>> from pyspark.mllib.linalg import Vectors |