aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/classification.py
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2016-10-30 16:21:37 -0700
committerFelix Cheung <felixcheung@apache.org>2016-10-30 16:21:37 -0700
commit7c3786929205b962b430cf7fc292602c2993c193 (patch)
tree5805b48f8f027a92f9dd3e99aca042eee99b4cef /python/pyspark/ml/classification.py
parentb6879b8b3518c71c23262554fcb0fdad60287011 (diff)
downloadspark-7c3786929205b962b430cf7fc292602c2993c193.tar.gz
spark-7c3786929205b962b430cf7fc292602c2993c193.tar.bz2
spark-7c3786929205b962b430cf7fc292602c2993c193.zip
[SPARK-18110][PYTHON][ML] add missing parameter in Python for RandomForest regression and classification
## What changes were proposed in this pull request? Add subsmaplingRate to randomForestClassifier Add varianceCol to randomForestRegressor In Python ## How was this patch tested? manual tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #15638 from felixcheung/pyrandomforest.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r--python/pyspark/ml/classification.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3f763a10d4..d9ff356b94 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -758,20 +758,21 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
probabilityCol="probability", rawPredictionCol="rawPrediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
- numTrees=20, featureSubsetStrategy="auto", seed=None):
+ numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
probabilityCol="probability", rawPredictionCol="rawPrediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
- numTrees=20, featureSubsetStrategy="auto", seed=None)
+ numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0)
"""
super(RandomForestClassifier, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
- impurity="gini", numTrees=20, featureSubsetStrategy="auto")
+ impurity="gini", numTrees=20, featureSubsetStrategy="auto",
+ subsamplingRate=1.0)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@@ -781,13 +782,13 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
probabilityCol="probability", rawPredictionCol="rawPrediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
- impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
+ impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
probabilityCol="probability", rawPredictionCol="rawPrediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
- impurity="gini", numTrees=20, featureSubsetStrategy="auto")
+ impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0)
Sets params for linear classification.
"""
kwargs = self.setParams._input_kwargs