[SPARK-7383] [ML] Feature Parity in PySpark for ml.features

Implemented python wrappers for Scala functions that don't exist in `ml.features` Author: Burak Yavuz <brkyvz@gmail.com> Closes #5991 from brkyvz/ml-feat-PR and squashes the following commits: adcca55 [Burak Yavuz] add regex tokenizer to __all__ b91cb44 [Burak Yavuz] addressed comments bd39fd2 [Burak Yavuz] remove addition b82bd7c [Burak Yavuz] Parity in PySpark for ml.features
author: Burak Yavuz <brkyvz@gmail.com> 2015-05-08 11:14:39 -0700
committer: Xiangrui Meng <meng@databricks.com> 2015-05-08 11:14:39 -0700
commit: f5ff4a84c4c75143086aae7d38730156bee35933 (patch)
tree: dcd901a73199c865dcbf7e7f53a06f8c2d76c261 /python/pyspark/ml/param
parent: c796be70f36e262b6a2ce75924bd970f40bf4045 (diff)
download: spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.gz
spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.bz2
spark-f5ff4a84c4c75143086aae7d38730156bee35933.zip
2 files changed, 92 insertions, 1 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index c1c8e921dd..ee901f2584 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -97,7 +97,9 @@ if __name__ == "__main__":
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
-        ("numFeatures", "number of features", None)]
+        ("seed", "random seed", None),
+        ("tol", "the convergence tolerance for iterative algorithms", None),
+        ("stepSize", "Step size to be used for each iteration of optimization.", None)]
     code = []
     for name, doc, defaultValueStr in shared:
         code.append(_gen_param_code(name, doc, defaultValueStr))
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index aaf80f0008..5e7529c1dc 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -308,3 +308,92 @@ class HasNumFeatures(Params):
         Gets the value of numFeatures or its default value.
         """
         return self.getOrDefault(self.numFeatures)
+
+
+class HasSeed(Params):
+    """
+    Mixin for param seed: random seed.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    seed = Param(Params._dummy(), "seed", "random seed")
+
+    def __init__(self):
+        super(HasSeed, self).__init__()
+        #: param for random seed
+        self.seed = Param(self, "seed", "random seed")
+        if None is not None:
+            self._setDefault(seed=None)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        self.paramMap[self.seed] = value
+        return self
+
+    def getSeed(self):
+        """
+        Gets the value of seed or its default value.
+        """
+        return self.getOrDefault(self.seed)
+
+
+class HasTol(Params):
+    """
+    Mixin for param tol: the convergence tolerance for iterative algorithms.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms")
+
+    def __init__(self):
+        super(HasTol, self).__init__()
+        #: param for the convergence tolerance for iterative algorithms
+        self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
+        if None is not None:
+            self._setDefault(tol=None)
+
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        self.paramMap[self.tol] = value
+        return self
+
+    def getTol(self):
+        """
+        Gets the value of tol or its default value.
+        """
+        return self.getOrDefault(self.tol)
+
+
+class HasStepSize(Params):
+    """
+    Mixin for param stepSize: Step size to be used for each iteration of optimization..
+    """
+
+    # a placeholder to make it appear in the generated doc
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size to be used for each iteration of optimization.")
+
+    def __init__(self):
+        super(HasStepSize, self).__init__()
+        #: param for Step size to be used for each iteration of optimization.
+        self.stepSize = Param(self, "stepSize",
+                              "Step size to be used for each iteration of optimization.")
+        if None is not None:
+            self._setDefault(stepSize=None)
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        self.paramMap[self.stepSize] = value
+        return self
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
author	Burak Yavuz <brkyvz@gmail.com>	2015-05-08 11:14:39 -0700
committer	Xiangrui Meng <meng@databricks.com>	2015-05-08 11:14:39 -0700
commit	f5ff4a84c4c75143086aae7d38730156bee35933 (patch)
tree	dcd901a73199c865dcbf7e7f53a06f8c2d76c261 /python/pyspark/ml/param
parent	c796be70f36e262b6a2ce75924bd970f40bf4045 (diff)
download	spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.gz spark-f5ff4a84c4c75143086aae7d38730156bee35933.tar.bz2 spark-f5ff4a84c4c75143086aae7d38730156bee35933.zip