aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/clustering.py
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-04-01 12:53:39 -0700
committerXiangrui Meng <meng@databricks.com>2016-04-01 12:53:39 -0700
commit381358fbe9afbe205299cbbea4c43148e2e69468 (patch)
tree8009263fab218b8d1b5b7ccf3d4ca718886207fd /python/pyspark/ml/clustering.py
parent8ba2b7f28fee39c4839e5ea125bd25f5091a3a1e (diff)
downloadspark-381358fbe9afbe205299cbbea4c43148e2e69468.tar.gz
spark-381358fbe9afbe205299cbbea4c43148e2e69468.tar.bz2
spark-381358fbe9afbe205299cbbea4c43148e2e69468.zip
[SPARK-14305][ML][PYSPARK] PySpark ml.clustering BisectingKMeans support export/import
## What changes were proposed in this pull request? PySpark ml.clustering BisectingKMeans support export/import ## How was this patch tested? doc test. cc jkbradley Author: Yanbo Liang <ybliang8@gmail.com> Closes #12112 from yanboliang/spark-14305.
Diffstat (limited to 'python/pyspark/ml/clustering.py')
-rw-r--r--python/pyspark/ml/clustering.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index e22d5c8ea4..f071c597c8 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -171,7 +171,7 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
return self.getOrDefault(self.initSteps)
-class BisectingKMeansModel(JavaModel):
+class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
.. note:: Experimental
@@ -195,7 +195,8 @@ class BisectingKMeansModel(JavaModel):
@inherit_doc
-class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasSeed):
+class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasSeed,
+ JavaMLWritable, JavaMLReadable):
"""
.. note:: Experimental
@@ -225,6 +226,18 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
True
>>> rows[2].prediction == rows[3].prediction
True
+ >>> bkm_path = temp_path + "/bkm"
+ >>> bkm.save(bkm_path)
+ >>> bkm2 = BisectingKMeans.load(bkm_path)
+ >>> bkm2.getK()
+ 2
+ >>> model_path = temp_path + "/bkm_model"
+ >>> model.save(model_path)
+ >>> model2 = BisectingKMeansModel.load(model_path)
+ >>> model.clusterCenters()[0] == model2.clusterCenters()[0]
+ array([ True, True], dtype=bool)
+ >>> model.clusterCenters()[1] == model2.clusterCenters()[1]
+ array([ True, True], dtype=bool)
.. versionadded:: 2.0.0
"""