diff options
Diffstat (limited to 'mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index bee90fb3a5..a2150fbcc3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -42,6 +42,16 @@ import org.apache.spark.sql.functions._ * learning algorithm for classification. * It supports binary labels, as well as both continuous and categorical features. * Note: Multiclass labels are not currently supported. + * + * The implementation is based upon: J.H. Friedman. "Stochastic Gradient Boosting." 1999. + * + * Notes on Gradient Boosting vs. TreeBoost: + * - This implementation is for Stochastic Gradient Boosting, not for TreeBoost. + * - Both algorithms learn tree ensembles by minimizing loss functions. + * - TreeBoost (Friedman, 1999) additionally modifies the outputs at tree leaf nodes + * based on the loss function, whereas the original gradient boosting method does not. + * - We expect to implement TreeBoost in the future: + * [https://issues.apache.org/jira/browse/SPARK-4240] */ @Since("1.4.0") @Experimental |