From 33ae7a35daa86c34f1f9f72f997e0c2d4cd8abec Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Mon, 2 Nov 2015 13:42:16 -0800 Subject: [SPARK-11358][MLLIB] deprecate runs in k-means This PR deprecates `runs` in k-means. `runs` introduces extra complexity and overhead in MLlib's k-means implementation. I haven't seen much usage with `runs` not equal to `1`. We don't have a unit test for it either. We can deprecate this method in 1.6, and void it in 1.7. It helps us simplify the implementation. cc: srowen Author: Xiangrui Meng Closes #9322 from mengxr/SPARK-11358. --- python/pyspark/mllib/clustering.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'python') diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index d1c3755a78..8629aa5a17 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -17,6 +17,7 @@ import sys import array as pyarray +import warnings if sys.version > '3': xrange = range @@ -170,6 +171,9 @@ class KMeans(object): def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4, initialModel=None): """Train a k-means clustering model.""" + if runs != 1: + warnings.warn( + "Support for runs is deprecated in 1.6.0. This param will have no effect in 1.7.0.") clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): -- cgit v1.2.3