diff options
author | Holden Karau <holden@us.ibm.com> | 2016-01-26 15:53:48 -0800 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2016-01-26 15:53:48 -0800 |
commit | eb917291ca1a2d68ca0639cb4b1464a546603eba (patch) | |
tree | 380dcaa33273baa68beaf089387bd498d5ee88e8 /python/pyspark/ml/classification.py | |
parent | 19fdb21afbf0eae4483cf6d4ef32daffd1994b89 (diff) | |
download | spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.gz spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.tar.bz2 spark-eb917291ca1a2d68ca0639cb4b1464a546603eba.zip |
[SPARK-10509][PYSPARK] Reduce excessive param boiler plate code
The current python ml params require cut-and-pasting the param setup and description between the class & ```__init__``` methods. Remove this possible case of errors & simplify use of custom params by adding a ```_copy_new_parent``` method to param so as to avoid cut and pasting (and cut and pasting at different indentation levels urgh).
Author: Holden Karau <holden@us.ibm.com>
Closes #10216 from holdenk/SPARK-10509-excessive-param-boiler-plate-code.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r-- | python/pyspark/ml/classification.py | 32 |
1 files changed, 0 insertions, 32 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 265c6a14f1..3179fb30ab 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -72,7 +72,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti .. versionadded:: 1.3.0 """ - # a placeholder to make it appear in the generated doc threshold = Param(Params._dummy(), "threshold", "Threshold in binary classification prediction, in range [0, 1]." + " If threshold and thresholds are both set, they must match.") @@ -92,10 +91,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti super(LogisticRegression, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.LogisticRegression", self.uid) - #: param for threshold in binary classification, in range [0, 1]. - self.threshold = Param(self, "threshold", - "Threshold in binary classification prediction, in range [0, 1]." + - " If threshold and thresholds are both set, they must match.") self._setDefault(maxIter=100, regParam=0.1, tol=1E-6, threshold=0.5) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @@ -232,7 +227,6 @@ class TreeClassifierParams(object): """ supportedImpurities = ["entropy", "gini"] - # a placeholder to make it appear in the generated doc impurity = Param(Params._dummy(), "impurity", "Criterion used for information gain calculation (case-insensitive). " + "Supported options: " + @@ -240,10 +234,6 @@ class TreeClassifierParams(object): def __init__(self): super(TreeClassifierParams, self).__init__() - #: param for Criterion used for information gain calculation (case-insensitive). - self.impurity = Param(self, "impurity", "Criterion used for information " + - "gain calculation (case-insensitive). Supported options: " + - ", ".join(self.supportedImpurities)) @since("1.6.0") def setImpurity(self, value): @@ -485,7 +475,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol .. versionadded:: 1.4.0 """ - # a placeholder to make it appear in the generated doc lossType = Param(Params._dummy(), "lossType", "Loss function which GBT tries to minimize (case-insensitive). " + "Supported options: " + ", ".join(GBTParams.supportedLossTypes)) @@ -504,10 +493,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol super(GBTClassifier, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.GBTClassifier", self.uid) - #: param for Loss function which GBT tries to minimize (case-insensitive). - self.lossType = Param(self, "lossType", - "Loss function which GBT tries to minimize (case-insensitive). " + - "Supported options: " + ", ".join(GBTParams.supportedLossTypes)) self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic", maxIter=20, stepSize=0.1) @@ -597,7 +582,6 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H .. versionadded:: 1.5.0 """ - # a placeholder to make it appear in the generated doc smoothing = Param(Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " + "default is 1.0") modelType = Param(Params._dummy(), "modelType", "The model type which is a string " + @@ -615,13 +599,6 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H super(NaiveBayes, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.NaiveBayes", self.uid) - #: param for the smoothing parameter. - self.smoothing = Param(self, "smoothing", "The smoothing parameter, should be >= 0, " + - "default is 1.0") - #: param for the model type. - self.modelType = Param(self, "modelType", "The model type which is a string " + - "(case-sensitive). Supported options: multinomial (default) " + - "and bernoulli.") self._setDefault(smoothing=1.0, modelType="multinomial") kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @@ -734,7 +711,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, .. versionadded:: 1.6.0 """ - # a placeholder to make it appear in the generated doc layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " + "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " + "neurons and output layer of 10 neurons, default is [1, 1].") @@ -753,14 +729,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, super(MultilayerPerceptronClassifier, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid) - self.layers = Param(self, "layers", "Sizes of layers from input layer to output layer " + - "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with " + - "100 neurons and output layer of 10 neurons, default is [1, 1].") - self.blockSize = Param(self, "blockSize", "Block size for stacking input data in " + - "matrices. Data is stacked within partitions. If block size is " + - "more than remaining data in a partition then it is adjusted to " + - "the size of this data. Recommended size is between 10 and 1000, " + - "default is 128.") self._setDefault(maxIter=100, tol=1E-4, layers=[1, 1], blockSize=128) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) |