aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-04-26 11:55:21 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-26 11:55:21 -0700
commit302a18686998b8b96546526bfccec9cf5b667386 (patch)
treeaeea4d75fd873d030892aec5407137bc40e5a871 /python/pyspark/mllib
parent2a3d39f48b1a7bb462e17e80e243bbc0a94d802e (diff)
downloadspark-302a18686998b8b96546526bfccec9cf5b667386.tar.gz
spark-302a18686998b8b96546526bfccec9cf5b667386.tar.bz2
spark-302a18686998b8b96546526bfccec9cf5b667386.zip
[SPARK-11559][MLLIB] Make `runs` no effect in mllib.KMeans
## What changes were proposed in this pull request? We deprecated ```runs``` of mllib.KMeans in Spark 1.6 (SPARK-11358). In 2.0, we will make it no effect (with warning messages). We did not remove ```setRuns/getRuns``` for better binary compatibility. This PR change `runs` which are appeared at the public API. Usage inside of ```KMeans.runAlgorithm()``` will be resolved at #10806. ## How was this patch tested? Existing unit tests. cc jkbradley Author: Yanbo Liang <ybliang8@gmail.com> Closes #12608 from yanboliang/spark-11559.
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/clustering.py9
1 files changed, 3 insertions, 6 deletions
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 23d118bd40..95f7278dc6 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -179,7 +179,7 @@ class KMeansModel(Saveable, Loader):
>>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
>>> model = KMeans.train(
- ... sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random",
+ ... sc.parallelize(data), 2, maxIterations=10, initializationMode="random",
... seed=50, initializationSteps=5, epsilon=1e-4)
>>> model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0]))
True
@@ -323,9 +323,7 @@ class KMeans(object):
Maximum number of iterations allowed.
(default: 100)
:param runs:
- Number of runs to execute in parallel. The best model according
- to the cost function will be returned (deprecated in 1.6.0).
- (default: 1)
+ This param has no effect since Spark 2.0.0.
:param initializationMode:
The initialization algorithm. This can be either "random" or
"k-means||".
@@ -350,8 +348,7 @@ class KMeans(object):
(default: None)
"""
if runs != 1:
- warnings.warn(
- "Support for runs is deprecated in 1.6.0. This param will have no effect in 2.0.0.")
+ warnings.warn("The param `runs` has no effect since Spark 2.0.0.")
clusterInitialModel = []
if initialModel is not None:
if not isinstance(initialModel, KMeansModel):