aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorHolden Karau <holden@us.ibm.com>2016-01-19 10:15:54 -0800
committerJoseph K. Bradley <joseph@databricks.com>2016-01-19 10:15:54 -0800
commit0ddba6d88ff093a96b4931f71bd0a599afbbca78 (patch)
tree331730923cc4d2d1bcd1b18a0c2829e4e5a6dbd5 /mllib
parentebd9ce0f1f55f7d2d3bd3b92c4b0a495c51ac6fd (diff)
downloadspark-0ddba6d88ff093a96b4931f71bd0a599afbbca78.tar.gz
spark-0ddba6d88ff093a96b4931f71bd0a599afbbca78.tar.bz2
spark-0ddba6d88ff093a96b4931f71bd0a599afbbca78.zip
[SPARK-11944][PYSPARK][MLLIB] python mllib.clustering.bisecting k means
From the coverage issues for 1.6 : Add Python API for mllib.clustering.BisectingKMeans. Author: Holden Karau <holden@us.ibm.com> Closes #10150 from holdenk/SPARK-11937-python-api-coverage-SPARK-11944-python-mllib.clustering.BisectingKMeans.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala17
1 files changed, 17 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 061db56c74..05f9a76d32 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -120,6 +120,23 @@ private[python] class PythonMLLibAPI extends Serializable {
}
/**
+ * Java stub for Python mllib BisectingKMeans.run()
+ */
+ def trainBisectingKMeans(
+ data: JavaRDD[Vector],
+ k: Int,
+ maxIterations: Int,
+ minDivisibleClusterSize: Double,
+ seed: Long): BisectingKMeansModel = {
+ new BisectingKMeans()
+ .setK(k)
+ .setMaxIterations(maxIterations)
+ .setMinDivisibleClusterSize(minDivisibleClusterSize)
+ .setSeed(seed)
+ .run(data)
+ }
+
+ /**
* Java stub for Python mllib LinearRegressionWithSGD.train()
*/
def trainLinearRegressionModelWithSGD(