aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorHrishikesh Subramonian <hrishikesh.subramonian@flytxt.com>2015-05-05 07:57:39 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-05 07:57:39 -0700
commit5995ada96b661546a80657f2c5ed20604593e4aa (patch)
tree5ea7879e611aa34fd912d93e253ad69d0799a665 /mllib
parent5ab652cdb8bef10214edd079502a7f49017579aa (diff)
downloadspark-5995ada96b661546a80657f2c5ed20604593e4aa.tar.gz
spark-5995ada96b661546a80657f2c5ed20604593e4aa.tar.bz2
spark-5995ada96b661546a80657f2c5ed20604593e4aa.zip
[SPARK-6612] [MLLIB] [PYSPARK] Python KMeans parity
The following items are added to Python kmeans: kmeans - setEpsilon, setInitializationSteps KMeansModel - computeCost, k Author: Hrishikesh Subramonian <hrishikesh.subramonian@flytxt.com> Closes #5647 from FlytxtRnD/newPyKmeansAPI and squashes the following commits: b9e451b [Hrishikesh Subramonian] set seed to fixed value in doc test 5fd3ced [Hrishikesh Subramonian] doc test corrections 20b3c68 [Hrishikesh Subramonian] python 3 fixes 4d4e695 [Hrishikesh Subramonian] added arguments in python tests 21eb84c [Hrishikesh Subramonian] Python Kmeans - setEpsilon, setInitializationSteps, k and computeCost added.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala15
1 files changed, 14 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 8e9a208d61..b086cec083 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -291,12 +291,16 @@ private[python] class PythonMLLibAPI extends Serializable {
maxIterations: Int,
runs: Int,
initializationMode: String,
- seed: java.lang.Long): KMeansModel = {
+ seed: java.lang.Long,
+ initializationSteps: Int,
+ epsilon: Double): KMeansModel = {
val kMeansAlg = new KMeans()
.setK(k)
.setMaxIterations(maxIterations)
.setRuns(runs)
.setInitializationMode(initializationMode)
+ .setInitializationSteps(initializationSteps)
+ .setEpsilon(epsilon)
if (seed != null) kMeansAlg.setSeed(seed)
@@ -308,6 +312,15 @@ private[python] class PythonMLLibAPI extends Serializable {
}
/**
+ * Java stub for Python mllib KMeansModel.computeCost()
+ */
+ def computeCostKmeansModel(
+ data: JavaRDD[Vector],
+ centers: java.util.ArrayList[Vector]): Double = {
+ new KMeansModel(centers).computeCost(data)
+ }
+
+ /**
* Java stub for Python mllib GaussianMixture.run()
* Returns a list containing weights, mean and covariance of each mixture component.
*/