From 0ddba6d88ff093a96b4931f71bd0a599afbbca78 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 19 Jan 2016 10:15:54 -0800 Subject: [SPARK-11944][PYSPARK][MLLIB] python mllib.clustering.bisecting k means From the coverage issues for 1.6 : Add Python API for mllib.clustering.BisectingKMeans. Author: Holden Karau Closes #10150 from holdenk/SPARK-11937-python-api-coverage-SPARK-11944-python-mllib.clustering.BisectingKMeans. --- .../apache/spark/mllib/api/python/PythonMLLibAPI.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 061db56c74..05f9a76d32 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -119,6 +119,23 @@ private[python] class PythonMLLibAPI extends Serializable { } } + /** + * Java stub for Python mllib BisectingKMeans.run() + */ + def trainBisectingKMeans( + data: JavaRDD[Vector], + k: Int, + maxIterations: Int, + minDivisibleClusterSize: Double, + seed: Long): BisectingKMeansModel = { + new BisectingKMeans() + .setK(k) + .setMaxIterations(maxIterations) + .setMinDivisibleClusterSize(minDivisibleClusterSize) + .setSeed(seed) + .run(data) + } + /** * Java stub for Python mllib LinearRegressionWithSGD.train() */ -- cgit v1.2.3