From 5207a005cc86618907b8f467abc03eacef485ecd Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 12 May 2016 09:19:27 +0200 Subject: [SPARK-15281][PYSPARK][ML][TRIVIAL] Add impurity param to GBTRegressor & add experimental inside of regression.py ## What changes were proposed in this pull request? Add impurity param to GBTRegressor and mark the of the models & regressors in regression.py as experimental to match Scaladoc. ## How was this patch tested? Added default value to init, tested with unit/doc tests. Author: Holden Karau Closes #13071 from holdenk/SPARK-15281-GBTRegressor-impurity. --- python/pyspark/ml/regression.py | 52 ++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 8 deletions(-) (limited to 'python') diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index a2300fa49c..0d0eb8ae46 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -40,6 +40,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Linear regression. The learning objective is to minimize the squared error, with regularization. @@ -123,6 +125,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by LinearRegression. .. versionadded:: 1.4.0 @@ -631,6 +635,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol): """ + .. note:: Experimental + `Decision tree `_ learning algorithm for regression. It supports both continuous and categorical features. @@ -713,7 +719,10 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi @inherit_doc class DecisionTreeModel(JavaModel): - """Abstraction for Decision Tree models. + """ + .. note:: Experimental + + Abstraction for Decision Tree models. .. versionadded:: 1.5.0 """ @@ -736,7 +745,10 @@ class DecisionTreeModel(JavaModel): @inherit_doc class TreeEnsembleModels(JavaModel): - """Represents a tree ensemble model. + """ + .. note:: Experimental + + Represents a tree ensemble model. .. versionadded:: 1.5.0 """ @@ -754,6 +766,8 @@ class TreeEnsembleModels(JavaModel): @inherit_doc class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by DecisionTreeRegressor. .. versionadded:: 1.4.0 @@ -786,6 +800,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi RandomForestParams, TreeRegressorParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + `Random Forest `_ learning algorithm for regression. It supports both continuous and categorical features. @@ -868,6 +884,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by RandomForestRegressor. .. versionadded:: 1.4.0 @@ -892,8 +910,10 @@ class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLRead @inherit_doc class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, - JavaMLReadable): + JavaMLReadable, TreeRegressorParams): """ + .. note:: Experimental + `Gradient-Boosted Trees (GBTs) `_ learning algorithm for regression. It supports both continuous and categorical features. @@ -904,6 +924,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, ... (1.0, Vectors.dense(1.0)), ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) >>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42) + >>> print(gbt.getImpurity()) + variance >>> model = gbt.fit(df) >>> model.featureImportances SparseVector(1, {0: 1.0}) @@ -940,19 +962,21 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, - checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None): + checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, + impurity="variance"): """ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \ - checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None) + checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \ + impurity="variance") """ super(GBTRegressor, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid) self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, - seed=None) + seed=None, impurity="variance") kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @@ -961,12 +985,14 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, - checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None): + checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, + impuriy="variance"): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \ - checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None) + checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \ + impurity="variance") Sets params for Gradient Boosted Tree Regression. """ kwargs = self.setParams._input_kwargs @@ -992,6 +1018,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by GBTRegressor. .. versionadded:: 1.4.0 @@ -1017,6 +1045,8 @@ class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasFitIntercept, HasMaxIter, HasTol, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Accelerated Failure Time (AFT) Model Survival Regression Fit a parametric AFT survival regression model based on the Weibull distribution @@ -1157,6 +1187,8 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by AFTSurvivalRegression. .. versionadded:: 1.6.0 @@ -1204,6 +1236,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol, HasSolver, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Generalized Linear Regression. Fit a Generalized Linear Model specified by giving a symbolic description of the linear @@ -1320,6 +1354,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha class GeneralizedLinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ + .. note:: Experimental + Model fitted by GeneralizedLinearRegression. .. versionadded:: 2.0.0 -- cgit v1.2.3