aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/param
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/ml/param')
-rw-r--r--python/pyspark/ml/param/__init__.py2
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py9
-rw-r--r--python/pyspark/ml/param/shared.py37
3 files changed, 13 insertions, 35 deletions
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 67fb6e3dc7..7845536161 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -147,7 +147,7 @@ class Params(Identifiable):
def getOrDefault(self, param):
"""
Gets the value of a param in the user-supplied param map or its
- default value. Raises an error if either is set.
+ default value. Raises an error if neither is set.
"""
param = self._resolveParam(param)
if param in self._paramMap:
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 91e45ec373..ccb929af18 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -56,9 +56,10 @@ def _gen_param_header(name, doc, defaultValueStr):
def __init__(self):
super(Has$Name, self).__init__()
#: param for $doc
- self.$name = Param(self, "$name", "$doc")
- if $defaultValueStr is not None:
- self._setDefault($name=$defaultValueStr)'''
+ self.$name = Param(self, "$name", "$doc")'''
+ if defaultValueStr is not None:
+ template += '''
+ self._setDefault($name=$defaultValueStr)'''
Name = name[0].upper() + name[1:]
return template \
@@ -118,7 +119,7 @@ if __name__ == "__main__":
("outputCol", "output column name", None),
("numFeatures", "number of features", None),
("checkpointInterval", "checkpoint interval (>= 1)", None),
- ("seed", "random seed", None),
+ ("seed", "random seed", "hash(type(self).__name__)"),
("tol", "the convergence tolerance for iterative algorithms", None),
("stepSize", "Step size to be used for each iteration of optimization.", None)]
code = []
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index a5dc9b7ef2..0b93788899 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -32,8 +32,6 @@ class HasMaxIter(Params):
super(HasMaxIter, self).__init__()
#: param for max number of iterations (>= 0)
self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)")
- if None is not None:
- self._setDefault(maxIter=None)
def setMaxIter(self, value):
"""
@@ -61,8 +59,6 @@ class HasRegParam(Params):
super(HasRegParam, self).__init__()
#: param for regularization parameter (>= 0)
self.regParam = Param(self, "regParam", "regularization parameter (>= 0)")
- if None is not None:
- self._setDefault(regParam=None)
def setRegParam(self, value):
"""
@@ -90,8 +86,7 @@ class HasFeaturesCol(Params):
super(HasFeaturesCol, self).__init__()
#: param for features column name
self.featuresCol = Param(self, "featuresCol", "features column name")
- if 'features' is not None:
- self._setDefault(featuresCol='features')
+ self._setDefault(featuresCol='features')
def setFeaturesCol(self, value):
"""
@@ -119,8 +114,7 @@ class HasLabelCol(Params):
super(HasLabelCol, self).__init__()
#: param for label column name
self.labelCol = Param(self, "labelCol", "label column name")
- if 'label' is not None:
- self._setDefault(labelCol='label')
+ self._setDefault(labelCol='label')
def setLabelCol(self, value):
"""
@@ -148,8 +142,7 @@ class HasPredictionCol(Params):
super(HasPredictionCol, self).__init__()
#: param for prediction column name
self.predictionCol = Param(self, "predictionCol", "prediction column name")
- if 'prediction' is not None:
- self._setDefault(predictionCol='prediction')
+ self._setDefault(predictionCol='prediction')
def setPredictionCol(self, value):
"""
@@ -177,8 +170,7 @@ class HasProbabilityCol(Params):
super(HasProbabilityCol, self).__init__()
#: param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
self.probabilityCol = Param(self, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
- if 'probability' is not None:
- self._setDefault(probabilityCol='probability')
+ self._setDefault(probabilityCol='probability')
def setProbabilityCol(self, value):
"""
@@ -206,8 +198,7 @@ class HasRawPredictionCol(Params):
super(HasRawPredictionCol, self).__init__()
#: param for raw prediction (a.k.a. confidence) column name
self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
- if 'rawPrediction' is not None:
- self._setDefault(rawPredictionCol='rawPrediction')
+ self._setDefault(rawPredictionCol='rawPrediction')
def setRawPredictionCol(self, value):
"""
@@ -235,8 +226,6 @@ class HasInputCol(Params):
super(HasInputCol, self).__init__()
#: param for input column name
self.inputCol = Param(self, "inputCol", "input column name")
- if None is not None:
- self._setDefault(inputCol=None)
def setInputCol(self, value):
"""
@@ -264,8 +253,6 @@ class HasInputCols(Params):
super(HasInputCols, self).__init__()
#: param for input column names
self.inputCols = Param(self, "inputCols", "input column names")
- if None is not None:
- self._setDefault(inputCols=None)
def setInputCols(self, value):
"""
@@ -293,8 +280,6 @@ class HasOutputCol(Params):
super(HasOutputCol, self).__init__()
#: param for output column name
self.outputCol = Param(self, "outputCol", "output column name")
- if None is not None:
- self._setDefault(outputCol=None)
def setOutputCol(self, value):
"""
@@ -322,8 +307,6 @@ class HasNumFeatures(Params):
super(HasNumFeatures, self).__init__()
#: param for number of features
self.numFeatures = Param(self, "numFeatures", "number of features")
- if None is not None:
- self._setDefault(numFeatures=None)
def setNumFeatures(self, value):
"""
@@ -351,8 +334,6 @@ class HasCheckpointInterval(Params):
super(HasCheckpointInterval, self).__init__()
#: param for checkpoint interval (>= 1)
self.checkpointInterval = Param(self, "checkpointInterval", "checkpoint interval (>= 1)")
- if None is not None:
- self._setDefault(checkpointInterval=None)
def setCheckpointInterval(self, value):
"""
@@ -380,8 +361,7 @@ class HasSeed(Params):
super(HasSeed, self).__init__()
#: param for random seed
self.seed = Param(self, "seed", "random seed")
- if None is not None:
- self._setDefault(seed=None)
+ self._setDefault(seed=hash(type(self).__name__))
def setSeed(self, value):
"""
@@ -409,8 +389,6 @@ class HasTol(Params):
super(HasTol, self).__init__()
#: param for the convergence tolerance for iterative algorithms
self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
- if None is not None:
- self._setDefault(tol=None)
def setTol(self, value):
"""
@@ -438,8 +416,6 @@ class HasStepSize(Params):
super(HasStepSize, self).__init__()
#: param for Step size to be used for each iteration of optimization.
self.stepSize = Param(self, "stepSize", "Step size to be used for each iteration of optimization.")
- if None is not None:
- self._setDefault(stepSize=None)
def setStepSize(self, value):
"""
@@ -467,6 +443,7 @@ class DecisionTreeParams(Params):
minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
+
def __init__(self):
super(DecisionTreeParams, self).__init__()