aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/tests.py
diff options
context:
space:
mode:
authorHrishikesh Subramonian <hrishikesh.subramonian@flytxt.com>2015-05-05 07:57:39 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-05 07:57:39 -0700
commit5995ada96b661546a80657f2c5ed20604593e4aa (patch)
tree5ea7879e611aa34fd912d93e253ad69d0799a665 /python/pyspark/mllib/tests.py
parent5ab652cdb8bef10214edd079502a7f49017579aa (diff)
downloadspark-5995ada96b661546a80657f2c5ed20604593e4aa.tar.gz
spark-5995ada96b661546a80657f2c5ed20604593e4aa.tar.bz2
spark-5995ada96b661546a80657f2c5ed20604593e4aa.zip
[SPARK-6612] [MLLIB] [PYSPARK] Python KMeans parity
The following items are added to Python kmeans: kmeans - setEpsilon, setInitializationSteps KMeansModel - computeCost, k Author: Hrishikesh Subramonian <hrishikesh.subramonian@flytxt.com> Closes #5647 from FlytxtRnD/newPyKmeansAPI and squashes the following commits: b9e451b [Hrishikesh Subramonian] set seed to fixed value in doc test 5fd3ced [Hrishikesh Subramonian] doc test corrections 20b3c68 [Hrishikesh Subramonian] python 3 fixes 4d4e695 [Hrishikesh Subramonian] added arguments in python tests 21eb84c [Hrishikesh Subramonian] Python Kmeans - setEpsilon, setInitializationSteps, k and computeCost added.
Diffstat (limited to 'python/pyspark/mllib/tests.py')
-rw-r--r--python/pyspark/mllib/tests.py9
1 files changed, 6 insertions, 3 deletions
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 1d9c6ebf3b..d05cfe2af0 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -236,7 +236,8 @@ class ListTests(MLlibTestCase):
[1.1, 0],
[1.2, 0],
]
- clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||")
+ clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||",
+ initializationSteps=7, epsilon=1e-4)
self.assertEquals(clusters.predict(data[0]), clusters.predict(data[1]))
self.assertEquals(clusters.predict(data[2]), clusters.predict(data[3]))
@@ -246,9 +247,11 @@ class ListTests(MLlibTestCase):
Y = range(0, 100, 10)
data = [[x, y] for x, y in zip(X, Y)]
clusters1 = KMeans.train(self.sc.parallelize(data),
- 3, initializationMode="k-means||", seed=42)
+ 3, initializationMode="k-means||",
+ seed=42, initializationSteps=7, epsilon=1e-4)
clusters2 = KMeans.train(self.sc.parallelize(data),
- 3, initializationMode="k-means||", seed=42)
+ 3, initializationMode="k-means||",
+ seed=42, initializationSteps=7, epsilon=1e-4)
centers1 = clusters1.centers
centers2 = clusters2.centers
for c1, c2 in zip(centers1, centers2):