From da936fbb74b852d5c98286ce92522dc3efd6ad6c Mon Sep 17 00:00:00 2001 From: Evan Chen Date: Wed, 7 Oct 2015 15:04:53 -0700 Subject: [SPARK-10779] [PYSPARK] [MLLIB] Set initialModel for KMeans model in PySpark (spark.mllib) Provide initialModel param for pyspark.mllib.clustering.KMeans Author: Evan Chen Closes #8967 from evanyc15/SPARK-10779-pyspark-mllib. --- .../main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 69ce7f5070..21e55938fa 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -336,7 +336,8 @@ private[python] class PythonMLLibAPI extends Serializable { initializationMode: String, seed: java.lang.Long, initializationSteps: Int, - epsilon: Double): KMeansModel = { + epsilon: Double, + initialModel: java.util.ArrayList[Vector]): KMeansModel = { val kMeansAlg = new KMeans() .setK(k) .setMaxIterations(maxIterations) @@ -346,6 +347,7 @@ private[python] class PythonMLLibAPI extends Serializable { .setEpsilon(epsilon) if (seed != null) kMeansAlg.setSeed(seed) + if (!initialModel.isEmpty()) kMeansAlg.setInitialModel(new KMeansModel(initialModel)) try { kMeansAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK)) -- cgit v1.2.3