aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2016-02-25 13:29:10 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-25 13:29:10 -0800
commit35316cb0b744bef9bcb390411ddc321167f953be (patch)
tree3ed75c8c6e20cb0a5468925416f70cd86a2933c2 /python
parent14e2700de29d06460179a94cc9816bcd37344cf7 (diff)
downloadspark-35316cb0b744bef9bcb390411ddc321167f953be.tar.gz
spark-35316cb0b744bef9bcb390411ddc321167f953be.tar.bz2
spark-35316cb0b744bef9bcb390411ddc321167f953be.zip
[SPARK-13292] [ML] [PYTHON] QuantileDiscretizer should take random seed in PySpark
## What changes were proposed in this pull request? QuantileDiscretizer in Python should also specify a random seed. ## How was this patch tested? unit tests Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #11362 from yu-iskw/SPARK-13292 and squashes the following commits: 02ffa76 [Yu ISHIKAWA] [SPARK-13292][ML][PYTHON] QuantileDiscretizer should take random seed in PySpark
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/ml/feature.py14
1 files changed, 8 insertions, 6 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 464c9446f2..67bccfae7a 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -939,7 +939,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc
-class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
+class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasSeed):
"""
.. note:: Experimental
@@ -951,7 +951,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
>>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
>>> qds = QuantileDiscretizer(numBuckets=2,
- ... inputCol="values", outputCol="buckets")
+ ... inputCol="values", outputCol="buckets", seed=123)
+ >>> qds.getSeed()
+ 123
>>> bucketizer = qds.fit(df)
>>> splits = bucketizer.getSplits()
>>> splits[0]
@@ -971,9 +973,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
"categories) into which data points are grouped. Must be >= 2. Default 2.")
@keyword_only
- def __init__(self, numBuckets=2, inputCol=None, outputCol=None):
+ def __init__(self, numBuckets=2, inputCol=None, outputCol=None, seed=None):
"""
- __init__(self, numBuckets=2, inputCol=None, outputCol=None)
+ __init__(self, numBuckets=2, inputCol=None, outputCol=None, seed=None)
"""
super(QuantileDiscretizer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
@@ -987,9 +989,9 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol):
@keyword_only
@since("2.0.0")
- def setParams(self, numBuckets=2, inputCol=None, outputCol=None):
+ def setParams(self, numBuckets=2, inputCol=None, outputCol=None, seed=None):
"""
- setParams(self, numBuckets=2, inputCol=None, outputCol=None)
+ setParams(self, numBuckets=2, inputCol=None, outputCol=None, seed=None)
Set the params for the QuantileDiscretizer
"""
kwargs = self.setParams._input_kwargs