[SPARK-8922] [DOCUMENTATION, MLLIB] Add @since tags to mllib.evaluation

Author: shikai.tang <tar.sky06@gmail.com> Closes #7429 from mosessky/master.
author: shikai.tang <tar.sky06@gmail.com> 2015-08-12 21:53:15 -0700
committer: Xiangrui Meng <meng@databricks.com> 2015-08-12 21:53:15 -0700
commit: df543892122342b97e5137b266959ba97589b3ef (patch)
tree: 525171bf0465cc95e220bfa30b672a5488428a18 /mllib
parent: 5fc058a1fc5d83ad53feec936475484aef3800b3 (diff)
download: spark-df543892122342b97e5137b266959ba97589b3ef.tar.gz
spark-df543892122342b97e5137b266959ba97589b3ef.tar.bz2
spark-df543892122342b97e5137b266959ba97589b3ef.zip
5 files changed, 50 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index c1d1a22481..486741edd6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.DataFrame
  *                of bins may not exactly equal numBins. The last bin in each partition may
  *                be smaller as a result, meaning there may be an extra sample at
  *                partition boundaries.
+ * @since 1.3.0
  */
 @Experimental
 class BinaryClassificationMetrics(
@@ -51,6 +52,7 @@ class BinaryClassificationMetrics(
 
   /**
    * Defaults `numBins` to 0.
+   * @since 1.0.0
    */
   def this(scoreAndLabels: RDD[(Double, Double)]) = this(scoreAndLabels, 0)
 
@@ -61,12 +63,18 @@ class BinaryClassificationMetrics(
   private[mllib] def this(scoreAndLabels: DataFrame) =
     this(scoreAndLabels.map(r => (r.getDouble(0), r.getDouble(1))))
 
-  /** Unpersist intermediate RDDs used in the computation. */
+  /**
+   * Unpersist intermediate RDDs used in the computation.
+   * @since 1.0.0
+   */
   def unpersist() {
     cumulativeCounts.unpersist()
   }
 
-  /** Returns thresholds in descending order. */
+  /**
+   * Returns thresholds in descending order.
+   * @since 1.0.0
+   */
   def thresholds(): RDD[Double] = cumulativeCounts.map(_._1)
 
   /**
@@ -74,6 +82,7 @@ class BinaryClassificationMetrics(
    * which is an RDD of (false positive rate, true positive rate)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
    * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
+   * @since 1.0.0
    */
   def roc(): RDD[(Double, Double)] = {
     val rocCurve = createCurve(FalsePositiveRate, Recall)
@@ -85,6 +94,7 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the receiver operating characteristic (ROC) curve.
+   * @since 1.0.0
    */
   def areaUnderROC(): Double = AreaUnderCurve.of(roc())
 
@@ -92,6 +102,7 @@ class BinaryClassificationMetrics(
    * Returns the precision-recall curve, which is an RDD of (recall, precision),
    * NOT (precision, recall), with (0.0, 1.0) prepended to it.
    * @see http://en.wikipedia.org/wiki/Precision_and_recall
+   * @since 1.0.0
    */
   def pr(): RDD[(Double, Double)] = {
     val prCurve = createCurve(Recall, Precision)
@@ -102,6 +113,7 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the precision-recall curve.
+   * @since 1.0.0
    */
   def areaUnderPR(): Double = AreaUnderCurve.of(pr())
 
@@ -110,16 +122,26 @@ class BinaryClassificationMetrics(
    * @param beta the beta factor in F-Measure computation.
    * @return an RDD of (threshold, F-Measure) pairs.
    * @see http://en.wikipedia.org/wiki/F1_score
+   * @since 1.0.0
    */
   def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = createCurve(FMeasure(beta))
 
-  /** Returns the (threshold, F-Measure) curve with beta = 1.0. */
+  /**
+   * Returns the (threshold, F-Measure) curve with beta = 1.0.
+   * @since 1.0.0
+   */
   def fMeasureByThreshold(): RDD[(Double, Double)] = fMeasureByThreshold(1.0)
 
-  /** Returns the (threshold, precision) curve. */
+  /**
+   * Returns the (threshold, precision) curve.
+   * @since 1.0.0
+   */
   def precisionByThreshold(): RDD[(Double, Double)] = createCurve(Precision)
 
-  /** Returns the (threshold, recall) curve. */
+  /**
+   * Returns the (threshold, recall) curve.
+   * @since 1.0.0
+   */
   def recallByThreshold(): RDD[(Double, Double)] = createCurve(Recall)
 
   private lazy val (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 4628dc5690..dddfa3ea5b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multiclass classification.
  *
  * @param predictionAndLabels an RDD of (prediction, label) pairs.
+ * @since 1.1.0
  */
 @Experimental
 class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
@@ -64,6 +65,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
    * predicted classes are in columns,
    * they are ordered by class label ascending,
    * as in "labels"
+   * @since 1.1.0
    */
   def confusionMatrix: Matrix = {
     val n = labels.size
@@ -83,12 +85,14 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns true positive rate for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def truePositiveRate(label: Double): Double = recall(label)
 
   /**
    * Returns false positive rate for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def falsePositiveRate(label: Double): Double = {
     val fp = fpByClass.getOrElse(label, 0)
@@ -98,6 +102,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns precision for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def precision(label: Double): Double = {
     val tp = tpByClass(label)
@@ -108,6 +113,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns recall for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label)
 
@@ -115,6 +121,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
    * Returns f-measure for a given label (category)
    * @param label the label.
    * @param beta the beta parameter.
+   * @since 1.1.0
    */
   def fMeasure(label: Double, beta: Double): Double = {
     val p = precision(label)
@@ -126,6 +133,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def fMeasure(label: Double): Double = fMeasure(label, 1.0)
 
@@ -179,6 +187,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns weighted averaged f-measure
    * @param beta the beta parameter.
+   * @since 1.1.0
    */
   def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) =>
     fMeasure(category, beta) * count.toDouble / labelCount
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index bf6eb1d5bd..77cb1e09bd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multilabel classification.
  * @param predictionAndLabels an RDD of (predictions, labels) pairs,
  * both are non-null Arrays, each with unique elements.
+ * @since 1.2.0
  */
 class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
 
@@ -103,6 +104,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns precision for a given label (category)
    * @param label the label.
+   * @since 1.2.0
    */
   def precision(label: Double): Double = {
     val tp = tpPerClass(label)
@@ -113,6 +115,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns recall for a given label (category)
    * @param label the label.
+   * @since 1.2.0
    */
   def recall(label: Double): Double = {
     val tp = tpPerClass(label)
@@ -123,6 +126,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
+   * @since 1.2.0
    */
   def f1Measure(label: Double): Double = {
     val p = precision(label)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 5b5a2a1450..063fbed8cd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -34,6 +34,7 @@ import org.apache.spark.rdd.RDD
  * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
  *
  * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
+ * @since 1.2.0
  */
 @Experimental
 class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])])
@@ -55,6 +56,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated precision, must be positive
    * @return the average precision at the first k ranking positions
+   * @since 1.2.0
    */
   def precisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
@@ -124,6 +126,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated ndcg, must be positive
    * @return the average ndcg at the first k ranking positions
+   * @since 1.2.0
    */
   def ndcgAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
@@ -162,6 +165,7 @@ object RankingMetrics {
   /**
    * Creates a [[RankingMetrics]] instance (for Java users).
    * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
+   * @since 1.4.0
    */
   def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
     implicit val tag = JavaSparkContext.fakeClassTag[E]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 408847afa8..54dfd8c099 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for regression.
  *
  * @param predictionAndObservations an RDD of (prediction, observation) pairs.
+ * @since 1.2.0
  */
 @Experimental
 class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
@@ -66,6 +67,7 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
    * Returns the variance explained by regression.
    * explainedVariance = \sum_i (\hat{y_i} - \bar{y})^2 / n
    * @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
+   * @since 1.2.0
    */
   def explainedVariance: Double = {
     SSreg / summary.count
@@ -74,6 +76,7 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the mean absolute error, which is a risk function corresponding to the
    * expected value of the absolute error loss or l1-norm loss.
+   * @since 1.2.0
    */
   def meanAbsoluteError: Double = {
     summary.normL1(1) / summary.count
@@ -82,6 +85,7 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the mean squared error, which is a risk function corresponding to the
    * expected value of the squared error loss or quadratic loss.
+   * @since 1.2.0
    */
   def meanSquaredError: Double = {
     SSerr / summary.count
@@ -90,6 +94,7 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the root mean squared error, which is defined as the square root of
    * the mean squared error.
+   * @since 1.2.0
    */
   def rootMeanSquaredError: Double = {
     math.sqrt(this.meanSquaredError)
@@ -98,6 +103,7 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns R^2^, the unadjusted coefficient of determination.
    * @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   * @since 1.2.0
    */
   def r2: Double = {
     1 - SSerr / SStot
author	shikai.tang <tar.sky06@gmail.com>	2015-08-12 21:53:15 -0700
committer	Xiangrui Meng <meng@databricks.com>	2015-08-12 21:53:15 -0700
commit	df543892122342b97e5137b266959ba97589b3ef (patch)
tree	525171bf0465cc95e220bfa30b672a5488428a18 /mllib
parent	5fc058a1fc5d83ad53feec936475484aef3800b3 (diff)
download	spark-df543892122342b97e5137b266959ba97589b3ef.tar.gz spark-df543892122342b97e5137b266959ba97589b3ef.tar.bz2 spark-df543892122342b97e5137b266959ba97589b3ef.zip