From 302a18686998b8b96546526bfccec9cf5b667386 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 26 Apr 2016 11:55:21 -0700 Subject: [SPARK-11559][MLLIB] Make `runs` no effect in mllib.KMeans ## What changes were proposed in this pull request? We deprecated ```runs``` of mllib.KMeans in Spark 1.6 (SPARK-11358). In 2.0, we will make it no effect (with warning messages). We did not remove ```setRuns/getRuns``` for better binary compatibility. This PR change `runs` which are appeared at the public API. Usage inside of ```KMeans.runAlgorithm()``` will be resolved at #10806. ## How was this patch tested? Existing unit tests. cc jkbradley Author: Yanbo Liang Closes #12608 from yanboliang/spark-11559. --- python/pyspark/mllib/clustering.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'python/pyspark/mllib') diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index 23d118bd40..95f7278dc6 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -179,7 +179,7 @@ class KMeansModel(Saveable, Loader): >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2) >>> model = KMeans.train( - ... sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random", + ... sc.parallelize(data), 2, maxIterations=10, initializationMode="random", ... seed=50, initializationSteps=5, epsilon=1e-4) >>> model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0])) True @@ -323,9 +323,7 @@ class KMeans(object): Maximum number of iterations allowed. (default: 100) :param runs: - Number of runs to execute in parallel. The best model according - to the cost function will be returned (deprecated in 1.6.0). - (default: 1) + This param has no effect since Spark 2.0.0. :param initializationMode: The initialization algorithm. This can be either "random" or "k-means||". @@ -350,8 +348,7 @@ class KMeans(object): (default: None) """ if runs != 1: - warnings.warn( - "Support for runs is deprecated in 1.6.0. This param will have no effect in 2.0.0.") + warnings.warn("The param `runs` has no effect since Spark 2.0.0.") clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): -- cgit v1.2.3