[SPARK-8924] [MLLIB, DOCUMENTATION] Added @since tags to mllib.tree

Added since tags to mllib.tree Author: Bryan Cutler <bjcutler@us.ibm.com> Closes #7380 from BryanCutler/sinceTag-mllibTree-8924.
author: Bryan Cutler <bjcutler@us.ibm.com> 2015-08-18 14:58:30 -0700
committer: Xiangrui Meng <meng@databricks.com> 2015-08-18 14:58:30 -0700
commit: 1dbffba37a84c62202befd3911d25888f958191d (patch)
tree: 0aa21cdd020476a65d04495305374925f0051d21
parent: 492ac1facbc79ee251d45cff315598ec9935a0e2 (diff)
download: spark-1dbffba37a84c62202befd3911d25888f958191d.tar.gz
spark-1dbffba37a84c62202befd3911d25888f958191d.tar.bz2
spark-1dbffba37a84c62202befd3911d25888f958191d.zip
24 files changed, 157 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index cecd1fed89..e5200b86fd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -43,6 +43,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * @param strategy The configuration parameters for the tree algorithm which specify the type
  *                 of algorithm (classification, regression, etc.), feature type (continuous,
  *                 categorical), depth of the tree, quantile calculation strategy, etc.
+ * @since 1.0.0
  */
 @Experimental
 class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
@@ -53,6 +54,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
    * Method to train a decision tree model over an RDD
    * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.2.0
    */
   def run(input: RDD[LabeledPoint]): DecisionTreeModel = {
     // Note: random seed will not be used since numTrees = 1.
@@ -62,6 +64,9 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
   }
 }
 
+/**
+ * @since 1.0.0
+ */
 object DecisionTree extends Serializable with Logging {
 
   /**
@@ -79,6 +84,7 @@ object DecisionTree extends Serializable with Logging {
    *                 of algorithm (classification, regression, etc.), feature type (continuous,
    *                 categorical), depth of the tree, quantile calculation strategy, etc.
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.0.0
   */
   def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = {
     new DecisionTree(strategy).run(input)
@@ -100,6 +106,7 @@ object DecisionTree extends Serializable with Logging {
    * @param maxDepth Maximum depth of the tree.
    *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.0.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -127,6 +134,7 @@ object DecisionTree extends Serializable with Logging {
    *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
    * @param numClasses number of classes for classification. Default value of 2.
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.2.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -160,6 +168,7 @@ object DecisionTree extends Serializable with Logging {
    *                                E.g., an entry (n -> k) indicates that feature n is categorical
    *                                with k categories indexed from 0: {0, 1, ..., k-1}.
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.0.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -192,6 +201,7 @@ object DecisionTree extends Serializable with Logging {
    * @param maxBins maximum number of bins used for splitting features
    *                 (suggested value: 32)
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.1.0
    */
   def trainClassifier(
       input: RDD[LabeledPoint],
@@ -207,6 +217,7 @@ object DecisionTree extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   * @since 1.1.0
    */
   def trainClassifier(
       input: JavaRDD[LabeledPoint],
@@ -236,6 +247,7 @@ object DecisionTree extends Serializable with Logging {
    * @param maxBins maximum number of bins used for splitting features
    *                 (suggested value: 32)
    * @return DecisionTreeModel that can be used for prediction
+   * @since 1.1.0
    */
   def trainRegressor(
       input: RDD[LabeledPoint],
@@ -249,6 +261,7 @@ object DecisionTree extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   * @since 1.1.0
    */
   def trainRegressor(
       input: JavaRDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index 9ce6faa137..1436170986 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -48,6 +48,7 @@ import org.apache.spark.storage.StorageLevel
  *       for other loss functions.
  *
  * @param boostingStrategy Parameters for the gradient boosting algorithm.
+ * @since 1.2.0
  */
 @Experimental
 class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
@@ -57,6 +58,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
    * Method to train a gradient boosting model
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return a gradient boosted trees model that can be used for prediction
+   * @since 1.2.0
    */
   def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
     val algo = boostingStrategy.treeStrategy.algo
@@ -74,6 +76,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]].
+   * @since 1.2.0
    */
   def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
     run(input.rdd)
@@ -88,6 +91,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
    *                        E.g., these two datasets could be created from an original dataset
    *                        by using [[org.apache.spark.rdd.RDD.randomSplit()]]
    * @return a gradient boosted trees model that can be used for prediction
+   * @since 1.4.0
    */
   def runWithValidation(
       input: RDD[LabeledPoint],
@@ -111,6 +115,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]].
+   * @since 1.4.0
    */
   def runWithValidation(
       input: JavaRDD[LabeledPoint],
@@ -119,6 +124,9 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
   }
 }
 
+/**
+ * @since 1.2.0
+ */
 object GradientBoostedTrees extends Logging {
 
   /**
@@ -129,6 +137,7 @@ object GradientBoostedTrees extends Logging {
    *              For regression, labels are real numbers.
    * @param boostingStrategy Configuration options for the boosting algorithm.
    * @return a gradient boosted trees model that can be used for prediction
+   * @since 1.2.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -138,6 +147,7 @@ object GradientBoostedTrees extends Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]]
+   * @since 1.2.0
    */
   def train(
       input: JavaRDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 069959976a..9f3230656a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -260,6 +260,9 @@ private class RandomForest (
 
 }
 
+/**
+ * @since 1.2.0
+ */
 object RandomForest extends Serializable with Logging {
 
   /**
@@ -276,6 +279,7 @@ object RandomForest extends Serializable with Logging {
    *                                if numTrees > 1 (forest) set to "sqrt".
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model that can be used for prediction
+   * @since 1.2.0
    */
   def trainClassifier(
       input: RDD[LabeledPoint],
@@ -313,6 +317,7 @@ object RandomForest extends Serializable with Logging {
    *                 (suggested value: 100)
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model  that can be used for prediction
+   * @since 1.2.0
    */
   def trainClassifier(
       input: RDD[LabeledPoint],
@@ -332,6 +337,7 @@ object RandomForest extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainClassifier]]
+   * @since 1.2.0
    */
   def trainClassifier(
       input: JavaRDD[LabeledPoint],
@@ -362,6 +368,7 @@ object RandomForest extends Serializable with Logging {
    *                                if numTrees > 1 (forest) set to "onethird".
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model that can be used for prediction
+   * @since 1.2.0
    */
   def trainRegressor(
       input: RDD[LabeledPoint],
@@ -398,6 +405,7 @@ object RandomForest extends Serializable with Logging {
    *                 (suggested value: 100)
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model that can be used for prediction
+   * @since 1.2.0
    */
   def trainRegressor(
       input: RDD[LabeledPoint],
@@ -416,6 +424,7 @@ object RandomForest extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainRegressor]]
+   * @since 1.2.0
    */
   def trainRegressor(
       input: JavaRDD[LabeledPoint],
@@ -433,6 +442,7 @@ object RandomForest extends Serializable with Logging {
 
   /**
    * List of supported feature subset sampling strategies.
+   * @since 1.2.0
    */
   val supportedFeatureSubsetStrategies: Array[String] =
     Array("auto", "all", "sqrt", "log2", "onethird")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index b609925997..d9a49aa71f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental
 /**
  * :: Experimental ::
  * Enum to select the algorithm for the decision tree
+ * @since 1.0.0
  */
 @Experimental
 object Algo extends Enumeration {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
index 50fe2ac53d..88e5f57e9a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
@@ -38,6 +38,7 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
  *                      validation input between two iterations is less than the validationTol
  *                      then stop.  Ignored when
  *                      [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used.
+ * @since 1.2.0
  */
 @Experimental
 case class BoostingStrategy(
@@ -70,6 +71,9 @@ case class BoostingStrategy(
   }
 }
 
+/**
+ * @since 1.2.0
+ */
 @Experimental
 object BoostingStrategy {
 
@@ -77,6 +81,7 @@ object BoostingStrategy {
    * Returns default configuration for the boosting algorithm
    * @param algo Learning goal.  Supported: "Classification" or "Regression"
    * @return Configuration for boosting algorithm
+   * @since 1.2.0
    */
   def defaultParams(algo: String): BoostingStrategy = {
     defaultParams(Algo.fromString(algo))
@@ -88,6 +93,7 @@ object BoostingStrategy {
    *             [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
    *             [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
    * @return Configuration for boosting algorithm
+   * @since 1.3.0
    */
   def defaultParams(algo: Algo): BoostingStrategy = {
     val treeStrategy = Strategy.defaultStrategy(algo)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
index f4c8772327..0684cafa48 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
@@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental
 /**
  * :: Experimental ::
  * Enum to describe whether a feature is "continuous" or "categorical"
+ * @since 1.0.0
  */
 @Experimental
 object FeatureType extends Enumeration {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
index 7da976e55a..2daa63c4d2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
@@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental
 /**
  * :: Experimental ::
  * Enum for selecting the quantile calculation strategy
+ * @since 1.0.0
  */
 @Experimental
 object QuantileStrategy extends Enumeration {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index de2c784809..7ae25a88bf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -66,6 +66,7 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
  *                           E.g. 10 means that the cache will get checkpointed every 10 updates. If
  *                           the checkpoint directory is not set in
  *                           [[org.apache.spark.SparkContext]], this setting is ignored.
+ * @since 1.0.0
  */
 @Experimental
 class Strategy (
@@ -83,16 +84,23 @@ class Strategy (
     @BeanProperty var useNodeIdCache: Boolean = false,
     @BeanProperty var checkpointInterval: Int = 10) extends Serializable {
 
+  /**
+   * @since 1.2.0
+   */
   def isMulticlassClassification: Boolean = {
     algo == Classification && numClasses > 2
   }
 
+  /**
+   * @since 1.2.0
+   */
   def isMulticlassWithCategoricalFeatures: Boolean = {
     isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
   }
 
   /**
    * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
+   * @since 1.1.0
    */
   def this(
       algo: Algo,
@@ -107,6 +115,7 @@ class Strategy (
 
   /**
    * Sets Algorithm using a String.
+   * @since 1.2.0
    */
   def setAlgo(algo: String): Unit = algo match {
     case "Classification" => setAlgo(Classification)
@@ -115,6 +124,7 @@ class Strategy (
 
   /**
    * Sets categoricalFeaturesInfo using a Java Map.
+   * @since 1.2.0
    */
   def setCategoricalFeaturesInfo(
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = {
@@ -162,7 +172,10 @@ class Strategy (
       s"$subsamplingRate")
   }
 
-  /** Returns a shallow copy of this instance. */
+  /**
+   * Returns a shallow copy of this instance.
+   * @since 1.2.0
+   */
   def copy: Strategy = {
     new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
       quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain,
@@ -170,12 +183,16 @@ class Strategy (
   }
 }
 
+/**
+ * @since 1.2.0
+ */
 @Experimental
 object Strategy {
 
   /**
    * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
    * @param algo  "Classification" or "Regression"
+   * @since 1.2.0
    */
   def defaultStrategy(algo: String): Strategy = {
     defaultStrategy(Algo.fromString(algo))
@@ -184,6 +201,7 @@ object Strategy {
   /**
    * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
    * @param algo Algo.Classification or Algo.Regression
+   * @since 1.3.0
    */
   def defaultStrategy(algo: Algo): Strategy = algo match {
     case Algo.Classification =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 0768204c33..0b6c7266de 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -23,6 +23,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
  * :: Experimental ::
  * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
  * binary classification.
+ * @since 1.0.0
  */
 @Experimental
 object Entropy extends Impurity {
@@ -35,6 +36,7 @@ object Entropy extends Impurity {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
+   * @since 1.1.0
    */
   @DeveloperApi
   override def calculate(counts: Array[Double], totalCount: Double): Double = {
@@ -62,6 +64,7 @@ object Entropy extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
+   * @since 1.0.0
    */
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
@@ -70,6 +73,7 @@ object Entropy extends Impurity {
   /**
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
+   * @since 1.1.0
    */
   def instance: this.type = this
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index d0077db683..3b0be42883 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -24,6 +24,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
  * Class for calculating the
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
  * during binary classification.
+ * @since 1.0.0
  */
 @Experimental
 object Gini extends Impurity {
@@ -34,6 +35,7 @@ object Gini extends Impurity {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
+   * @since 1.1.0
    */
   @DeveloperApi
   override def calculate(counts: Array[Double], totalCount: Double): Double = {
@@ -58,6 +60,7 @@ object Gini extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
+   * @since 1.0.0
    */
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
@@ -66,6 +69,7 @@ object Gini extends Impurity {
   /**
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
+   * @since 1.1.0
    */
   def instance: this.type = this
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 86cee7e430..dd29740005 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -25,6 +25,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
  * This trait is used for
  *  (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]]
  *  (b) calculating impurity values from sufficient statistics.
+ *  @since 1.0.0
  */
 @Experimental
 trait Impurity extends Serializable {
@@ -35,6 +36,7 @@ trait Impurity extends Serializable {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
+   * @since 1.1.0
    */
   @DeveloperApi
   def calculate(counts: Array[Double], totalCount: Double): Double
@@ -46,6 +48,7 @@ trait Impurity extends Serializable {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
+   * @since 1.0.0
    */
   @DeveloperApi
   def calculate(count: Double, sum: Double, sumSquares: Double): Double
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 04d0cd24e6..adbe05811f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -22,6 +22,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
 /**
  * :: Experimental ::
  * Class for calculating variance during regression
+ * @since 1.0.0
  */
 @Experimental
 object Variance extends Impurity {
@@ -32,6 +33,7 @@ object Variance extends Impurity {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
+   * @since 1.1.0
    */
   @DeveloperApi
   override def calculate(counts: Array[Double], totalCount: Double): Double =
@@ -44,6 +46,7 @@ object Variance extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
+   * @since 1.0.0
    */
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
@@ -57,6 +60,7 @@ object Variance extends Impurity {
   /**
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
+   * @since 1.0.0
    */
   def instance: this.type = this
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
index 2bdef73c4a..c6e3d0d824 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
@@ -29,6 +29,7 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel
  * The absolute (L1) error is defined as:
  *  |y - F(x)|
  * where y is the label and F(x) is the model prediction for features x.
+ * @since 1.2.0
  */
 @DeveloperApi
 object AbsoluteError extends Loss {
@@ -40,6 +41,7 @@ object AbsoluteError extends Loss {
    * @param prediction Predicted label.
    * @param label True label.
    * @return Loss gradient
+   * @since 1.2.0
    */
   override def gradient(prediction: Double, label: Double): Double = {
     if (label - prediction < 0) 1.0 else -1.0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
index 778c24526d..eee58445a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
@@ -31,6 +31,7 @@ import org.apache.spark.mllib.util.MLUtils
  * The log loss is defined as:
  *   2 log(1 + exp(-2 y F(x)))
  * where y is a label in {-1, 1} and F(x) is the model prediction for features x.
+ * @since 1.2.0
  */
 @DeveloperApi
 object LogLoss extends Loss {
@@ -42,6 +43,7 @@ object LogLoss extends Loss {
    * @param prediction Predicted label.
    * @param label True label.
    * @return Loss gradient
+   * @since 1.2.0
    */
   override def gradient(prediction: Double, label: Double): Double = {
     - 4.0 * label / (1.0 + math.exp(2.0 * label * prediction))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
index 64ffccbce0..7c9fb92464 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD
 /**
  * :: DeveloperApi ::
  * Trait for adding "pluggable" loss functions for the gradient boosting algorithm.
+ * @since 1.2.0
  */
 @DeveloperApi
 trait Loss extends Serializable {
@@ -35,6 +36,7 @@ trait Loss extends Serializable {
    * @param prediction Predicted feature
    * @param label true label.
    * @return Loss gradient.
+   * @since 1.2.0
    */
   def gradient(prediction: Double, label: Double): Double
 
@@ -45,6 +47,7 @@ trait Loss extends Serializable {
    * @param model Model of the weak learner.
    * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return Measure of model error on data
+   * @since 1.2.0
    */
   def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
     data.map(point => computeError(model.predict(point.features), point.label)).mean()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
index 42c9ead988..47dc94cde7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
@@ -17,8 +17,14 @@
 
 package org.apache.spark.mllib.tree.loss
 
+/**
+ * @since 1.2.0
+ */
 object Losses {
 
+  /**
+   * @since 1.2.0
+   */
   def fromString(name: String): Loss = name match {
     case "leastSquaresError" => SquaredError
     case "leastAbsoluteError" => AbsoluteError
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
index 011a5d5742..ff8903d695 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
@@ -29,6 +29,7 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel
  * The squared (L2) error is defined as:
  *   (y - F(x))**2
  * where y is the label and F(x) is the model prediction for features x.
+ * @since 1.2.0
  */
 @DeveloperApi
 object SquaredError extends Loss {
@@ -40,6 +41,7 @@ object SquaredError extends Loss {
    * @param prediction Predicted label.
    * @param label True label.
    * @return Loss gradient
+   * @since 1.2.0
    */
   override def gradient(prediction: Double, label: Double): Double = {
     - 2.0 * (label - prediction)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index f2c78bbabf..0f386a2660 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -40,6 +40,7 @@ import org.apache.spark.util.Utils
  * This model stores the decision tree structure and parameters.
  * @param topNode root node
  * @param algo algorithm type -- classification or regression
+ * @since 1.0.0
  */
 @Experimental
 class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable with Saveable {
@@ -49,6 +50,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    *
    * @param features array representing a single data point
    * @return Double prediction from the trained model
+   * @since 1.0.0
    */
   def predict(features: Vector): Double = {
     topNode.predict(features)
@@ -59,6 +61,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    *
    * @param features RDD representing data points to be predicted
    * @return RDD of predictions for each of the given data points
+   * @since 1.0.0
    */
   def predict(features: RDD[Vector]): RDD[Double] = {
     features.map(x => predict(x))
@@ -69,6 +72,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    *
    * @param features JavaRDD representing data points to be predicted
    * @return JavaRDD of predictions for each of the given data points
+   * @since 1.2.0
    */
   def predict(features: JavaRDD[Vector]): JavaRDD[Double] = {
     predict(features.rdd)
@@ -76,6 +80,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
 
   /**
    * Get number of nodes in tree, including leaf nodes.
+   * @since 1.1.0
    */
   def numNodes: Int = {
     1 + topNode.numDescendants
@@ -84,6 +89,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
   /**
    * Get depth of tree.
    * E.g.: Depth 0 means 1 leaf node.  Depth 1 means 1 internal node and 2 leaf nodes.
+   * @since 1.1.0
    */
   def depth: Int = {
     topNode.subtreeDepth
@@ -109,6 +115,12 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
     header + topNode.subtreeToString(2)
   }
 
+  /**
+   * @param sc  Spark context used to save model data.
+   * @param path  Path specifying the directory in which to save this model.
+   *              If the directory already exists, this method throws an exception.
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     DecisionTreeModel.SaveLoadV1_0.save(sc, path, this)
   }
@@ -116,6 +128,9 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
   override protected def formatVersion: String = DecisionTreeModel.formatVersion
 }
 
+/**
+ * @since 1.3.0
+ */
 object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
 
   private[spark] def formatVersion: String = "1.0"
@@ -297,6 +312,13 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
     }
   }
 
+  /**
+   *
+   * @param sc  Spark context used for loading model files.
+   * @param path  Path specifying the directory to which the model was saved.
+   * @return  Model instance
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): DecisionTreeModel = {
     implicit val formats = DefaultFormats
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 508bf9c1bd..23f0363639 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -29,6 +29,7 @@ import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
  * @param rightImpurity right node impurity
  * @param leftPredict left node predict
  * @param rightPredict right node predict
+ * @since 1.0.0
  */
 @DeveloperApi
 class InformationGainStats(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index a6d1398fc2..aca3350c2e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -38,6 +38,7 @@ import org.apache.spark.mllib.linalg.Vector
  * @param leftNode  left child
  * @param rightNode right child
  * @param stats information gain stats
+ * @since 1.0.0
  */
 @DeveloperApi
 class Node (
@@ -58,6 +59,7 @@ class Node (
   /**
    * build the left node and right nodes if not leaf
    * @param nodes array of nodes
+   * @since 1.0.0
    */
   @deprecated("build should no longer be used since trees are constructed on-the-fly in training",
     "1.2.0")
@@ -79,6 +81,7 @@ class Node (
    * predict value if node is not leaf
    * @param features feature value
    * @return predicted value
+   * @since 1.1.0
    */
   def predict(features: Vector) : Double = {
     if (isLeaf) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
index 5cbe7c280d..be819b59e7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -23,6 +23,7 @@ import org.apache.spark.annotation.DeveloperApi
  * Predicted value for a node
  * @param predict predicted value
  * @param prob probability of the label (classification only)
+ * @since 1.2.0
  */
 @DeveloperApi
 class Predict(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index be6c9b3de5..18d40530ae 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -30,6 +30,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
  *                  Split left if feature <= threshold, else right.
  * @param featureType type of feature -- categorical or continuous
  * @param categories Split left if categorical feature value is in this set, else right.
+ * @since 1.0.0
  */
 @DeveloperApi
 case class Split(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 905c5fb42b..0c629b12a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -45,6 +45,7 @@ import org.apache.spark.util.Utils
  *
  * @param algo algorithm for the ensemble model, either Classification or Regression
  * @param trees tree ensembles
+ * @since 1.2.0
  */
 @Experimental
 class RandomForestModel(override val algo: Algo, override val trees: Array[DecisionTreeModel])
@@ -54,6 +55,13 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis
 
   require(trees.forall(_.algo == algo))
 
+  /**
+   *
+   * @param sc  Spark context used to save model data.
+   * @param path  Path specifying the directory in which to save this model.
+   *              If the directory already exists, this method throws an exception.
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
       RandomForestModel.SaveLoadV1_0.thisClassName)
@@ -62,10 +70,20 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis
   override protected def formatVersion: String = RandomForestModel.formatVersion
 }
 
+/**
+ * @since 1.3.0
+ */
 object RandomForestModel extends Loader[RandomForestModel] {
 
   private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
 
+  /**
+   *
+   * @param sc  Spark context used for loading model files.
+   * @param path  Path specifying the directory to which the model was saved.
+   * @return  Model instance
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): RandomForestModel = {
     val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path)
     val classNameV1_0 = SaveLoadV1_0.thisClassName
@@ -96,6 +114,7 @@ object RandomForestModel extends Loader[RandomForestModel] {
  * @param algo algorithm for the ensemble model, either Classification or Regression
  * @param trees tree ensembles
  * @param treeWeights tree ensemble weights
+ * @since 1.2.0
  */
 @Experimental
 class GradientBoostedTreesModel(
@@ -107,6 +126,12 @@ class GradientBoostedTreesModel(
 
   require(trees.length == treeWeights.length)
 
+  /**
+   * @param sc  Spark context used to save model data.
+   * @param path  Path specifying the directory in which to save this model.
+   *              If the directory already exists, this method throws an exception.
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
       GradientBoostedTreesModel.SaveLoadV1_0.thisClassName)
@@ -118,6 +143,7 @@ class GradientBoostedTreesModel(
    * @param loss evaluation metric.
    * @return an array with index i having the losses or errors for the ensemble
    *         containing the first i+1 trees
+   * @since 1.4.0
    */
   def evaluateEachIteration(
       data: RDD[LabeledPoint],
@@ -159,6 +185,9 @@ class GradientBoostedTreesModel(
   override protected def formatVersion: String = GradientBoostedTreesModel.formatVersion
 }
 
+/**
+ * @since 1.3.0
+ */
 object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
 
   /**
@@ -170,6 +199,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param loss: evaluation metric.
    * @return a RDD with each element being a zip of the prediction and error
    *         corresponding to every sample.
+   * @since 1.4.0
    */
   def computeInitialPredictionAndError(
       data: RDD[LabeledPoint],
@@ -193,6 +223,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param loss: evaluation metric.
    * @return a RDD with each element being a zip of the prediction and error
    *         corresponding to each sample.
+   * @since 1.4.0
    */
   def updatePredictionError(
     data: RDD[LabeledPoint],
@@ -213,6 +244,12 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
 
   private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
 
+  /**
+   * @param sc  Spark context used for loading model files.
+   * @param path  Path specifying the directory to which the model was saved.
+   * @return  Model instance
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): GradientBoostedTreesModel = {
     val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path)
     val classNameV1_0 = SaveLoadV1_0.thisClassName
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
index bcaacc1b1f..f520b3a1b7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
@@ -24,6 +24,7 @@ package org.apache.spark.mllib
  *  - information loss calculation with entropy and Gini for classification and
  *    variance for regression,
  *  - both continuous and categorical features.
+ *  @since 1.0.0
  */
 package object tree {
 }
author	Bryan Cutler <bjcutler@us.ibm.com>	2015-08-18 14:58:30 -0700
committer	Xiangrui Meng <meng@databricks.com>	2015-08-18 14:58:30 -0700
commit	1dbffba37a84c62202befd3911d25888f958191d (patch)
tree	0aa21cdd020476a65d04495305374925f0051d21
parent	492ac1facbc79ee251d45cff315598ec9935a0e2 (diff)
download	spark-1dbffba37a84c62202befd3911d25888f958191d.tar.gz spark-1dbffba37a84c62202befd3911d25888f958191d.tar.bz2 spark-1dbffba37a84c62202befd3911d25888f958191d.zip