aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/clustering.py
diff options
context:
space:
mode:
authorHolden Karau <holden@us.ibm.com>2016-01-26 15:53:48 -0800
committerJoseph K. Bradley <joseph@databricks.com>2016-01-26 15:53:48 -0800
commiteb917291ca1a2d68ca0639cb4b1464a546603eba (patch)
tree380dcaa33273baa68beaf089387bd498d5ee88e8 /python/pyspark/ml/clustering.py
parent19fdb21afbf0eae4483cf6d4ef32daffd1994b89 (diff)
downloadspark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.gz
spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.bz2
spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.zip
[SPARK-10509][PYSPARK] Reduce excessive param boiler plate code
The current python ml params require cut-and-pasting the param setup and description between the class & ```__init__``` methods. Remove this possible case of errors & simplify use of custom params by adding a ```_copy_new_parent``` method to param so as to avoid cut and pasting (and cut and pasting at different indentation levels urgh). Author: Holden Karau <holden@us.ibm.com> Closes #10216 from holdenk/SPARK-10509-excessive-param-boiler-plate-code.
Diffstat (limited to 'python/pyspark/ml/clustering.py')
-rw-r--r--python/pyspark/ml/clustering.py7
1 files changed, 0 insertions, 7 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 9189c02220..60d1c9aaec 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -73,7 +73,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
.. versionadded:: 1.5.0
"""
- # a placeholder to make it appear in the generated doc
k = Param(Params._dummy(), "k", "number of clusters to create")
initMode = Param(Params._dummy(), "initMode",
"the initialization algorithm. This can be either \"random\" to " +
@@ -90,12 +89,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
"""
super(KMeans, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
- self.k = Param(self, "k", "number of clusters to create")
- self.initMode = Param(self, "initMode",
- "the initialization algorithm. This can be either \"random\" to " +
- "choose random points as initial cluster centers, or \"k-means||\" " +
- "to use a parallel variant of k-means++")
- self.initSteps = Param(self, "initSteps", "steps for k-means initialization mode")
self._setDefault(k=2, initMode="k-means||", initSteps=5, tol=1e-4, maxIter=20)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)