diff options
author | Burak Yavuz <brkyvz@gmail.com> | 2015-05-08 11:14:39 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-05-08 11:14:39 -0700 |
commit | f5ff4a84c4c75143086aae7d38730156bee35933 (patch) | |
tree | dcd901a73199c865dcbf7e7f53a06f8c2d76c261 /python/pyspark/ml/param | |
parent | c796be70f36e262b6a2ce75924bd970f40bf4045 (diff) | |
download | spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.gz spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.bz2 spark-f5ff4a84c4c75143086aae7d38730156bee35933.zip |
[SPARK-7383] [ML] Feature Parity in PySpark for ml.features
Implemented python wrappers for Scala functions that don't exist in `ml.features`
Author: Burak Yavuz <brkyvz@gmail.com>
Closes #5991 from brkyvz/ml-feat-PR and squashes the following commits:
adcca55 [Burak Yavuz] add regex tokenizer to __all__
b91cb44 [Burak Yavuz] addressed comments
bd39fd2 [Burak Yavuz] remove addition
b82bd7c [Burak Yavuz] Parity in PySpark for ml.features
Diffstat (limited to 'python/pyspark/ml/param')
-rw-r--r-- | python/pyspark/ml/param/_shared_params_code_gen.py | 4 | ||||
-rw-r--r-- | python/pyspark/ml/param/shared.py | 89 |
2 files changed, 92 insertions, 1 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index c1c8e921dd..ee901f2584 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -97,7 +97,9 @@ if __name__ == "__main__": ("inputCol", "input column name", None), ("inputCols", "input column names", None), ("outputCol", "output column name", None), - ("numFeatures", "number of features", None)] + ("seed", "random seed", None), + ("tol", "the convergence tolerance for iterative algorithms", None), + ("stepSize", "Step size to be used for each iteration of optimization.", None)] code = [] for name, doc, defaultValueStr in shared: code.append(_gen_param_code(name, doc, defaultValueStr)) diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index aaf80f0008..5e7529c1dc 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -308,3 +308,92 @@ class HasNumFeatures(Params): Gets the value of numFeatures or its default value. """ return self.getOrDefault(self.numFeatures) + + +class HasSeed(Params): + """ + Mixin for param seed: random seed. + """ + + # a placeholder to make it appear in the generated doc + seed = Param(Params._dummy(), "seed", "random seed") + + def __init__(self): + super(HasSeed, self).__init__() + #: param for random seed + self.seed = Param(self, "seed", "random seed") + if None is not None: + self._setDefault(seed=None) + + def setSeed(self, value): + """ + Sets the value of :py:attr:`seed`. + """ + self.paramMap[self.seed] = value + return self + + def getSeed(self): + """ + Gets the value of seed or its default value. + """ + return self.getOrDefault(self.seed) + + +class HasTol(Params): + """ + Mixin for param tol: the convergence tolerance for iterative algorithms. + """ + + # a placeholder to make it appear in the generated doc + tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms") + + def __init__(self): + super(HasTol, self).__init__() + #: param for the convergence tolerance for iterative algorithms + self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms") + if None is not None: + self._setDefault(tol=None) + + def setTol(self, value): + """ + Sets the value of :py:attr:`tol`. + """ + self.paramMap[self.tol] = value + return self + + def getTol(self): + """ + Gets the value of tol or its default value. + """ + return self.getOrDefault(self.tol) + + +class HasStepSize(Params): + """ + Mixin for param stepSize: Step size to be used for each iteration of optimization.. + """ + + # a placeholder to make it appear in the generated doc + stepSize = Param(Params._dummy(), "stepSize", + "Step size to be used for each iteration of optimization.") + + def __init__(self): + super(HasStepSize, self).__init__() + #: param for Step size to be used for each iteration of optimization. + self.stepSize = Param(self, "stepSize", + "Step size to be used for each iteration of optimization.") + if None is not None: + self._setDefault(stepSize=None) + + def setStepSize(self, value): + """ + Sets the value of :py:attr:`stepSize`. + """ + self.paramMap[self.stepSize] = value + return self + + def getStepSize(self): + """ + Gets the value of stepSize or its default value. + """ + return self.getOrDefault(self.stepSize) |