[SPARK-10509][PYSPARK] Reduce excessive param boiler plate code

The current python ml params require cut-and-pasting the param setup and description between the class & ```__init__``` methods. Remove this possible case of errors & simplify use of custom params by adding a ```_copy_new_parent``` method to param so as to avoid cut and pasting (and cut and pasting at different indentation levels urgh). Author: Holden Karau <holden@us.ibm.com> Closes #10216 from holdenk/SPARK-10509-excessive-param-boiler-plate-code.
author: Holden Karau <holden@us.ibm.com> 2016-01-26 15:53:48 -0800
committer: Joseph K. Bradley <joseph@databricks.com> 2016-01-26 15:53:48 -0800
commit: eb917291ca1a2d68ca0639cb4b1464a546603eba (patch)
tree: 380dcaa33273baa68beaf089387bd498d5ee88e8 /python/pyspark/ml/clustering.py
parent: 19fdb21afbf0eae4483cf6d4ef32daffd1994b89 (diff)
download: spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.gz
spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.bz2
spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.zip
1 files changed, 0 insertions, 7 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 9189c02220..60d1c9aaec 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -73,7 +73,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
     .. versionadded:: 1.5.0
     """
 
-    # a placeholder to make it appear in the generated doc
     k = Param(Params._dummy(), "k", "number of clusters to create")
     initMode = Param(Params._dummy(), "initMode",
                      "the initialization algorithm. This can be either \"random\" to " +
@@ -90,12 +89,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
         """
         super(KMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
-        self.k = Param(self, "k", "number of clusters to create")
-        self.initMode = Param(self, "initMode",
-                              "the initialization algorithm. This can be either \"random\" to " +
-                              "choose random points as initial cluster centers, or \"k-means||\" " +
-                              "to use a parallel variant of k-means++")
-        self.initSteps = Param(self, "initSteps", "steps for k-means initialization mode")
         self._setDefault(k=2, initMode="k-means||", initSteps=5, tol=1e-4, maxIter=20)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
author	Holden Karau <holden@us.ibm.com>	2016-01-26 15:53:48 -0800
committer	Joseph K. Bradley <joseph@databricks.com>	2016-01-26 15:53:48 -0800
commit	eb917291ca1a2d68ca0639cb4b1464a546603eba (patch)
tree	380dcaa33273baa68beaf089387bd498d5ee88e8 /python/pyspark/ml/clustering.py
parent	19fdb21afbf0eae4483cf6d4ef32daffd1994b89 (diff)
download	spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.gz spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.bz2 spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.zip