aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/classification.py
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-08-12 13:24:18 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-08-12 13:24:18 -0700
commit762bacc16ac5e74c8b05a7c1e3e367d1d1633cef (patch)
treeda72f2717842672fcdbe092947284c0b4f009cf2 /python/pyspark/ml/classification.py
parent60103ecd3d9c92709a5878be7ebd57012813ab48 (diff)
downloadspark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.tar.gz
spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.tar.bz2
spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.zip
[SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML
Check and add miss docs for PySpark ML (this issue only check miss docs for o.a.s.ml not o.a.s.mllib). Author: Yanbo Liang <ybliang8@gmail.com> Closes #8059 from yanboliang/SPARK-9766.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r--python/pyspark/ml/classification.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 5978d8f4d3..6702dce554 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -34,6 +34,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol):
"""
Logistic regression.
+ Currently, this class only supports binary classification.
>>> from pyspark.sql import Row
>>> from pyspark.mllib.linalg import Vectors
@@ -96,8 +97,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
# is an L2 penalty. For alpha = 1, it is an L1 penalty.
self.elasticNetParam = \
Param(self, "elasticNetParam",
- "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
- "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+ "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+ "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
#: param for whether to fit an intercept term.
self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
#: param for threshold in binary classification prediction, in range [0, 1].
@@ -656,6 +657,13 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
HasRawPredictionCol):
"""
Naive Bayes Classifiers.
+ It supports both Multinomial and Bernoulli NB. Multinomial NB
+ (`http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html`)
+ can handle finitely supported discrete data. For example, by converting documents into
+ TF-IDF vectors, it can be used for document classification. By making every vector a
+ binary (0/1) data, it can also be used as Bernoulli NB
+ (`http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html`).
+ The input feature values must be nonnegative.
>>> from pyspark.sql import Row
>>> from pyspark.mllib.linalg import Vectors