aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/param
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-05-08 11:14:39 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-08 11:14:39 -0700
commitf5ff4a84c4c75143086aae7d38730156bee35933 (patch)
treedcd901a73199c865dcbf7e7f53a06f8c2d76c261 /python/pyspark/ml/param
parentc796be70f36e262b6a2ce75924bd970f40bf4045 (diff)
downloadspark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.gz
spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.bz2
spark-f5ff4a84c4c75143086aae7d38730156bee35933.zip
[SPARK-7383] [ML] Feature Parity in PySpark for ml.features
Implemented python wrappers for Scala functions that don't exist in `ml.features` Author: Burak Yavuz <brkyvz@gmail.com> Closes #5991 from brkyvz/ml-feat-PR and squashes the following commits: adcca55 [Burak Yavuz] add regex tokenizer to __all__ b91cb44 [Burak Yavuz] addressed comments bd39fd2 [Burak Yavuz] remove addition b82bd7c [Burak Yavuz] Parity in PySpark for ml.features
Diffstat (limited to 'python/pyspark/ml/param')
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py4
-rw-r--r--python/pyspark/ml/param/shared.py89
2 files changed, 92 insertions, 1 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index c1c8e921dd..ee901f2584 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -97,7 +97,9 @@ if __name__ == "__main__":
("inputCol", "input column name", None),
("inputCols", "input column names", None),
("outputCol", "output column name", None),
- ("numFeatures", "number of features", None)]
+ ("seed", "random seed", None),
+ ("tol", "the convergence tolerance for iterative algorithms", None),
+ ("stepSize", "Step size to be used for each iteration of optimization.", None)]
code = []
for name, doc, defaultValueStr in shared:
code.append(_gen_param_code(name, doc, defaultValueStr))
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index aaf80f0008..5e7529c1dc 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -308,3 +308,92 @@ class HasNumFeatures(Params):
Gets the value of numFeatures or its default value.
"""
return self.getOrDefault(self.numFeatures)
+
+
+class HasSeed(Params):
+ """
+ Mixin for param seed: random seed.
+ """
+
+ # a placeholder to make it appear in the generated doc
+ seed = Param(Params._dummy(), "seed", "random seed")
+
+ def __init__(self):
+ super(HasSeed, self).__init__()
+ #: param for random seed
+ self.seed = Param(self, "seed", "random seed")
+ if None is not None:
+ self._setDefault(seed=None)
+
+ def setSeed(self, value):
+ """
+ Sets the value of :py:attr:`seed`.
+ """
+ self.paramMap[self.seed] = value
+ return self
+
+ def getSeed(self):
+ """
+ Gets the value of seed or its default value.
+ """
+ return self.getOrDefault(self.seed)
+
+
+class HasTol(Params):
+ """
+ Mixin for param tol: the convergence tolerance for iterative algorithms.
+ """
+
+ # a placeholder to make it appear in the generated doc
+ tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms")
+
+ def __init__(self):
+ super(HasTol, self).__init__()
+ #: param for the convergence tolerance for iterative algorithms
+ self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
+ if None is not None:
+ self._setDefault(tol=None)
+
+ def setTol(self, value):
+ """
+ Sets the value of :py:attr:`tol`.
+ """
+ self.paramMap[self.tol] = value
+ return self
+
+ def getTol(self):
+ """
+ Gets the value of tol or its default value.
+ """
+ return self.getOrDefault(self.tol)
+
+
+class HasStepSize(Params):
+ """
+ Mixin for param stepSize: Step size to be used for each iteration of optimization..
+ """
+
+ # a placeholder to make it appear in the generated doc
+ stepSize = Param(Params._dummy(), "stepSize",
+ "Step size to be used for each iteration of optimization.")
+
+ def __init__(self):
+ super(HasStepSize, self).__init__()
+ #: param for Step size to be used for each iteration of optimization.
+ self.stepSize = Param(self, "stepSize",
+ "Step size to be used for each iteration of optimization.")
+ if None is not None:
+ self._setDefault(stepSize=None)
+
+ def setStepSize(self, value):
+ """
+ Sets the value of :py:attr:`stepSize`.
+ """
+ self.paramMap[self.stepSize] = value
+ return self
+
+ def getStepSize(self):
+ """
+ Gets the value of stepSize or its default value.
+ """
+ return self.getOrDefault(self.stepSize)