aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/clustering.py
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-08-12 13:24:18 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-08-12 13:24:18 -0700
commit762bacc16ac5e74c8b05a7c1e3e367d1d1633cef (patch)
treeda72f2717842672fcdbe092947284c0b4f009cf2 /python/pyspark/ml/clustering.py
parent60103ecd3d9c92709a5878be7ebd57012813ab48 (diff)
downloadspark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.tar.gz
spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.tar.bz2
spark-762bacc16ac5e74c8b05a7c1e3e367d1d1633cef.zip
[SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML
Check and add miss docs for PySpark ML (this issue only check miss docs for o.a.s.ml not o.a.s.mllib). Author: Yanbo Liang <ybliang8@gmail.com> Closes #8059 from yanboliang/SPARK-9766.
Diffstat (limited to 'python/pyspark/ml/clustering.py')
-rw-r--r--python/pyspark/ml/clustering.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index b5e9b6549d..48338713a2 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -37,7 +37,9 @@ class KMeansModel(JavaModel):
@inherit_doc
class KMeans(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed):
"""
- K-means Clustering
+ K-means clustering with support for multiple parallel runs and a k-means++ like initialization
+ mode (the k-means|| algorithm by Bahmani et al). When multiple concurrent runs are requested,
+ they are executed together with joint passes over the data for efficiency.
>>> from pyspark.mllib.linalg import Vectors
>>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),