[SPARK-13292] [ML] [PYTHON] QuantileDiscretizer should take random seed in PySpark

## What changes were proposed in this pull request? QuantileDiscretizer in Python should also specify a random seed. ## How was this patch tested? unit tests Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #11362 from yu-iskw/SPARK-13292 and squashes the following commits: 02ffa76 [Yu ISHIKAWA] [SPARK-13292][ML][PYTHON] QuantileDiscretizer should take random seed in PySpark
author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> 2016-02-25 13:29:10 -0800
committer: Xiangrui Meng <meng@databricks.com> 2016-02-25 13:29:10 -0800
commit: 35316cb0b744bef9bcb390411ddc321167f953be (patch)
tree: 3ed75c8c6e20cb0a5468925416f70cd86a2933c2 /python
parent: 14e2700de29d06460179a94cc9816bcd37344cf7 (diff)
download: spark-35316cb0b744bef9bcb390411ddc321167f953be.tar.gz
spark-35316cb0b744bef9bcb390411ddc321167f953be.tar.bz2
spark-35316cb0b744bef9bcb390411ddc321167f953be.zip
1 files changed, 8 insertions, 6 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 464c9446f2..67bccfae7a 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -939,7 +939,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
 
 
 @inherit_doc
-class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
+class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasSeed):
     """
     .. note:: Experimental
 
@@ -951,7 +951,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
 
     >>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
     >>> qds = QuantileDiscretizer(numBuckets=2,
-    ...     inputCol="values", outputCol="buckets")
+    ...     inputCol="values", outputCol="buckets", seed=123)
+    >>> qds.getSeed()
+    123
     >>> bucketizer = qds.fit(df)
     >>> splits = bucketizer.getSplits()
     >>> splits[0]
@@ -971,9 +973,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
                        "categories) into which data points are grouped. Must be >= 2. Default 2.")
 
     @keyword_only
-    def __init__(self, numBuckets=2, inputCol=None, outputCol=None):
+    def __init__(self, numBuckets=2, inputCol=None, outputCol=None, seed=None):
         """
-        __init__(self, numBuckets=2, inputCol=None, outputCol=None)
+        __init__(self, numBuckets=2, inputCol=None, outputCol=None, seed=None)
         """
         super(QuantileDiscretizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
@@ -987,9 +989,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, numBuckets=2, inputCol=None, outputCol=None):
+    def setParams(self, numBuckets=2, inputCol=None, outputCol=None, seed=None):
         """
-        setParams(self, numBuckets=2, inputCol=None, outputCol=None)
+        setParams(self, numBuckets=2, inputCol=None, outputCol=None, seed=None)
         Set the params for the QuantileDiscretizer
         """
         kwargs = self.setParams._input_kwargs
author	Yu ISHIKAWA <yuu.ishikawa@gmail.com>	2016-02-25 13:29:10 -0800
committer	Xiangrui Meng <meng@databricks.com>	2016-02-25 13:29:10 -0800
commit	35316cb0b744bef9bcb390411ddc321167f953be (patch)
tree	3ed75c8c6e20cb0a5468925416f70cd86a2933c2 /python
parent	14e2700de29d06460179a94cc9816bcd37344cf7 (diff)
download	spark-35316cb0b744bef9bcb390411ddc321167f953be.tar.gz spark-35316cb0b744bef9bcb390411ddc321167f953be.tar.bz2 spark-35316cb0b744bef9bcb390411ddc321167f953be.zip