[SPARK-18110][PYTHON][ML] add missing parameter in Python for RandomForest regression and classification

## What changes were proposed in this pull request? Add subsmaplingRate to randomForestClassifier Add varianceCol to randomForestRegressor In Python ## How was this patch tested? manual tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #15638 from felixcheung/pyrandomforest.
author: Felix Cheung <felixcheung_m@hotmail.com> 2016-10-30 16:21:37 -0700
committer: Felix Cheung <felixcheung@apache.org> 2016-10-30 16:21:37 -0700
commit: 7c3786929205b962b430cf7fc292602c2993c193 (patch)
tree: 5805b48f8f027a92f9dd3e99aca042eee99b4cef /python/pyspark/ml/classification.py
parent: b6879b8b3518c71c23262554fcb0fdad60287011 (diff)
download: spark-7c3786929205b962b430cf7fc292602c2993c193.tar.gz
spark-7c3786929205b962b430cf7fc292602c2993c193.tar.bz2
spark-7c3786929205b962b430cf7fc292602c2993c193.zip
1 files changed, 6 insertions, 5 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3f763a10d4..d9ff356b94 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -758,20 +758,21 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
                  probabilityCol="probability", rawPredictionCol="rawPrediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
-                 numTrees=20, featureSubsetStrategy="auto", seed=None):
+                 numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
-                 numTrees=20, featureSubsetStrategy="auto", seed=None)
+                 numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0)
         """
         super(RandomForestClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="gini", numTrees=20, featureSubsetStrategy="auto")
+                         impurity="gini", numTrees=20, featureSubsetStrategy="auto",
+                         subsamplingRate=1.0)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -781,13 +782,13 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
                   probabilityCol="probability", rawPredictionCol="rawPrediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
-                  impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
+                  impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
-                  impurity="gini", numTrees=20, featureSubsetStrategy="auto")
+                  impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0)
         Sets params for linear classification.
         """
         kwargs = self.setParams._input_kwargs
author	Felix Cheung <felixcheung_m@hotmail.com>	2016-10-30 16:21:37 -0700
committer	Felix Cheung <felixcheung@apache.org>	2016-10-30 16:21:37 -0700
commit	7c3786929205b962b430cf7fc292602c2993c193 (patch)
tree	5805b48f8f027a92f9dd3e99aca042eee99b4cef /python/pyspark/ml/classification.py
parent	b6879b8b3518c71c23262554fcb0fdad60287011 (diff)
download	spark-7c3786929205b962b430cf7fc292602c2993c193.tar.gz spark-7c3786929205b962b430cf7fc292602c2993c193.tar.bz2 spark-7c3786929205b962b430cf7fc292602c2993c193.zip