aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/clustering.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/ml/clustering.py')
-rw-r--r--python/pyspark/ml/clustering.py11
1 files changed, 4 insertions, 7 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 9740ec45af..16ce02ee7d 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -39,8 +39,9 @@ class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable):
@since("2.0.0")
def weights(self):
"""
- Weights for each Gaussian distribution in the mixture, where weights[i] is
- the weight for Gaussian i, and weights.sum == 1.
+ Weight for each Gaussian distribution in the mixture.
+ This is a multinomial probability distribution over the k Gaussians,
+ where weights[i] is the weight for Gaussian i, and weights sum to 1.
"""
return self._call_java("weights")
@@ -50,11 +51,7 @@ class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
Retrieve Gaussian distributions as a DataFrame.
Each row represents a Gaussian Distribution.
- Two columns are defined: mean and cov.
- Schema:
- root
- -- mean: vector (nullable = true)
- -- cov: matrix (nullable = true)
+ The DataFrame has two columns: mean (Vector) and cov (Matrix).
"""
return self._call_java("gaussiansDF")