diff options
Diffstat (limited to 'python/pyspark/ml/clustering.py')
-rw-r--r-- | python/pyspark/ml/clustering.py | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 9740ec45af..16ce02ee7d 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -39,8 +39,9 @@ class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable): @since("2.0.0") def weights(self): """ - Weights for each Gaussian distribution in the mixture, where weights[i] is - the weight for Gaussian i, and weights.sum == 1. + Weight for each Gaussian distribution in the mixture. + This is a multinomial probability distribution over the k Gaussians, + where weights[i] is the weight for Gaussian i, and weights sum to 1. """ return self._call_java("weights") @@ -50,11 +51,7 @@ class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable): """ Retrieve Gaussian distributions as a DataFrame. Each row represents a Gaussian Distribution. - Two columns are defined: mean and cov. - Schema: - root - -- mean: vector (nullable = true) - -- cov: matrix (nullable = true) + The DataFrame has two columns: mean (Vector) and cov (Matrix). """ return self._call_java("gaussiansDF") |