aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorEvan Chen <chene@us.ibm.com>2015-10-07 15:04:53 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-10-07 15:04:53 -0700
commitda936fbb74b852d5c98286ce92522dc3efd6ad6c (patch)
tree99055e11626d7c3297a8a6fb3569b802b1c99c3e /mllib
parent713e4f44e92f25f1092c898923f77249934f38b2 (diff)
downloadspark-da936fbb74b852d5c98286ce92522dc3efd6ad6c.tar.gz
spark-da936fbb74b852d5c98286ce92522dc3efd6ad6c.tar.bz2
spark-da936fbb74b852d5c98286ce92522dc3efd6ad6c.zip
[SPARK-10779] [PYSPARK] [MLLIB] Set initialModel for KMeans model in PySpark (spark.mllib)
Provide initialModel param for pyspark.mllib.clustering.KMeans Author: Evan Chen <chene@us.ibm.com> Closes #8967 from evanyc15/SPARK-10779-pyspark-mllib.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala4
1 files changed, 3 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 69ce7f5070..21e55938fa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -336,7 +336,8 @@ private[python] class PythonMLLibAPI extends Serializable {
initializationMode: String,
seed: java.lang.Long,
initializationSteps: Int,
- epsilon: Double): KMeansModel = {
+ epsilon: Double,
+ initialModel: java.util.ArrayList[Vector]): KMeansModel = {
val kMeansAlg = new KMeans()
.setK(k)
.setMaxIterations(maxIterations)
@@ -346,6 +347,7 @@ private[python] class PythonMLLibAPI extends Serializable {
.setEpsilon(epsilon)
if (seed != null) kMeansAlg.setSeed(seed)
+ if (!initialModel.isEmpty()) kMeansAlg.setInitialModel(new KMeansModel(initialModel))
try {
kMeansAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK))