[SPARK-9864] [DOC] [MLlib] [SQL] Replace since in scaladoc to Since annotation

Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #8352 from MechCoder/since.
author: MechCoder <manojkumarsivaraj334@gmail.com> 2015-08-21 14:19:24 -0700
committer: Xiangrui Meng <meng@databricks.com> 2015-08-21 14:19:24 -0700
commit: f5b028ed2f1ad6de43c8b50ebf480e1b6c047035 (patch)
tree: 64f92dec3697098ef9c5c954f302cd1c796cedf1 /mllib
parent: d89cc38b33815e7b99fb3389b5038a543527065d (diff)
download: spark-f5b028ed2f1ad6de43c8b50ebf480e1b6c047035.tar.gz
spark-f5b028ed2f1ad6de43c8b50ebf480e1b6c047035.tar.bz2
spark-f5b028ed2f1ad6de43c8b50ebf480e1b6c047035.zip
68 files changed, 692 insertions, 862 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index ba73024e3c..a29b425a71 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.classification
 
 import org.json4s.{DefaultFormats, JValue}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.rdd.RDD
@@ -36,8 +36,8 @@ trait ClassificationModel extends Serializable {
    *
    * @param testData RDD representing data points to be predicted
    * @return an RDD[Double] where each entry contains the corresponding prediction
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(testData: RDD[Vector]): RDD[Double]
 
   /**
@@ -45,16 +45,16 @@ trait ClassificationModel extends Serializable {
    *
    * @param testData array representing a single data point
    * @return predicted category from the trained model
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(testData: Vector): Double
 
   /**
    * Predict values for examples stored in a JavaRDD.
    * @param testData JavaRDD representing data points to be predicted
    * @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 268642ac6a..e03e662227 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.classification
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.classification.impl.GLMClassificationModel
 import org.apache.spark.mllib.linalg.BLAS.dot
 import org.apache.spark.mllib.linalg.{DenseVector, Vector}
@@ -85,8 +85,8 @@ class LogisticRegressionModel (
    * in Binary Logistic Regression. An example with prediction score greater than or equal to
    * this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
    * It is only used for binary classification.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def setThreshold(threshold: Double): this.type = {
     this.threshold = Some(threshold)
@@ -97,8 +97,8 @@ class LogisticRegressionModel (
    * :: Experimental ::
    * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
    * It is only used for binary classification.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def getThreshold: Option[Double] = threshold
 
@@ -106,8 +106,8 @@ class LogisticRegressionModel (
    * :: Experimental ::
    * Clears the threshold so that `predict` will output raw prediction scores.
    * It is only used for binary classification.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def clearThreshold(): this.type = {
     threshold = None
@@ -158,9 +158,7 @@ class LogisticRegressionModel (
     }
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
       numFeatures, numClasses, weights, intercept, threshold)
@@ -168,9 +166,7 @@ class LogisticRegressionModel (
 
   override protected def formatVersion: String = "1.0"
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def toString: String = {
     s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
   }
@@ -178,9 +174,7 @@ class LogisticRegressionModel (
 
 object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): LogisticRegressionModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -261,8 +255,8 @@ object LogisticRegressionWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -284,8 +278,8 @@ object LogisticRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
 
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -306,8 +300,8 @@ object LogisticRegressionWithSGD {
 
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LogisticRegressionModel which has the weights and offset from training.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -324,8 +318,8 @@ object LogisticRegressionWithSGD {
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LogisticRegressionModel which has the weights and offset from training.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): LogisticRegressionModel = {
@@ -361,8 +355,8 @@ class LogisticRegressionWithLBFGS
    * Set the number of possible outcomes for k classes classification problem in
    * Multinomial Logistic Regression.
    * By default, it is binary logistic regression so k will be set to 2.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def setNumClasses(numClasses: Int): this.type = {
     require(numClasses > 1)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 2df91c0942..dab369207c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -25,6 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext, SparkException}
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -444,8 +445,8 @@ object NaiveBayes {
    *
    * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
    *              vector or a count vector.
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def train(input: RDD[LabeledPoint]): NaiveBayesModel = {
     new NaiveBayes().run(input)
   }
@@ -460,8 +461,8 @@ object NaiveBayes {
    * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
    *              vector or a count vector.
    * @param lambda The smoothing parameter
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
     new NaiveBayes(lambda, Multinomial).run(input)
   }
@@ -483,8 +484,8 @@ object NaiveBayes {
    *
    * @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
    *              multinomial or bernoulli
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
     require(supportedModelTypes.contains(modelType),
       s"NaiveBayes was created with an unknown modelType: $modelType.")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 5b54feeb10..5f87269863 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.classification
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.classification.impl.GLMClassificationModel
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
@@ -46,8 +46,8 @@ class SVMModel (
    * Sets the threshold that separates positive predictions from negative predictions. An example
    * with prediction score greater than or equal to this threshold is identified as an positive,
    * and negative otherwise. The default value is 0.0.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def setThreshold(threshold: Double): this.type = {
     this.threshold = Some(threshold)
@@ -57,16 +57,16 @@ class SVMModel (
   /**
    * :: Experimental ::
    * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   def getThreshold: Option[Double] = threshold
 
   /**
    * :: Experimental ::
    * Clears the threshold so that `predict` will output raw prediction scores.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def clearThreshold(): this.type = {
     threshold = None
@@ -84,9 +84,7 @@ class SVMModel (
     }
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
       numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
@@ -94,9 +92,7 @@ class SVMModel (
 
   override protected def formatVersion: String = "1.0"
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def toString: String = {
     s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
   }
@@ -104,9 +100,7 @@ class SVMModel (
 
 object SVMModel extends Loader[SVMModel] {
 
-   /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): SVMModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -185,8 +179,8 @@ object SVMWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -209,8 +203,8 @@ object SVMWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -231,8 +225,8 @@ object SVMWithSGD {
    * @param regParam Regularization parameter.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a SVMModel which has the weights and offset from training.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -250,8 +244,8 @@ object SVMWithSGD {
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a SVMModel which has the weights and offset from training.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
     train(input, numIterations, 1.0, 0.01, 1.0)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index bc27b1fe73..fcc9dfecac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.IndexedSeq
 
 import breeze.linalg.{diag, DenseMatrix => BreezeMatrix, DenseVector => BDV, Vector => BV}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, Matrices, Vector, Vectors}
 import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
@@ -62,8 +62,8 @@ class GaussianMixture private (
   /**
    * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
    * maxIterations: 100, seed: random}.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(2, 0.01, 100, Utils.random.nextLong())
 
   // number of samples per cluster to use when initializing Gaussians
@@ -77,8 +77,8 @@ class GaussianMixture private (
    * Set the initial GMM starting point, bypassing the random initialization.
    * You must call setK() prior to calling this method, and the condition
    * (model.k == this.k) must be met; failure will result in an IllegalArgumentException
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setInitialModel(model: GaussianMixtureModel): this.type = {
     if (model.k == k) {
       initialModel = Some(model)
@@ -90,14 +90,14 @@ class GaussianMixture private (
 
   /**
    * Return the user supplied initial GMM, if supplied
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getInitialModel: Option[GaussianMixtureModel] = initialModel
 
   /**
    * Set the number of Gaussians in the mixture model.  Default: 2
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -105,14 +105,14 @@ class GaussianMixture private (
 
   /**
    * Return the number of Gaussians in the mixture model
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getK: Int = k
 
   /**
    * Set the maximum number of iterations to run. Default: 100
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -120,15 +120,15 @@ class GaussianMixture private (
 
   /**
    * Return the maximum number of iterations to run
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getMaxIterations: Int = maxIterations
 
   /**
    * Set the largest change in log-likelihood at which convergence is
    * considered to have occurred.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setConvergenceTol(convergenceTol: Double): this.type = {
     this.convergenceTol = convergenceTol
     this
@@ -137,14 +137,14 @@ class GaussianMixture private (
   /**
    * Return the largest change in log-likelihood at which convergence is
    * considered to have occurred.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getConvergenceTol: Double = convergenceTol
 
   /**
    * Set the random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -152,14 +152,14 @@ class GaussianMixture private (
 
   /**
    * Return the random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getSeed: Long = seed
 
   /**
    * Perform expectation maximization
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(data: RDD[Vector]): GaussianMixtureModel = {
     val sc = data.sparkContext
 
@@ -235,8 +235,8 @@ class GaussianMixture private (
 
   /**
    * Java-friendly version of [[run()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(data: JavaRDD[Vector]): GaussianMixtureModel = run(data.rdd)
 
   private def updateWeightsAndGaussians(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 2fa0473737..1a10a8b624 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -24,7 +24,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.{Vector, Matrices, Matrix}
 import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
@@ -43,8 +43,8 @@ import org.apache.spark.sql.{SQLContext, Row}
  *                the weight for Gaussian i, and weights.sum == 1
  * @param gaussians Array of MultivariateGaussian where gaussians(i) represents
  *                  the Multivariate Gaussian (Normal) Distribution for Gaussian i
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class GaussianMixtureModel(
   val weights: Array[Double],
@@ -54,23 +54,21 @@ class GaussianMixtureModel(
 
   override protected def formatVersion = "1.0"
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians)
   }
 
   /**
    * Number of gaussians in mixture
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def k: Int = weights.length
 
   /**
    * Maps given points to their cluster indices.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predict(points: RDD[Vector]): RDD[Int] = {
     val responsibilityMatrix = predictSoft(points)
     responsibilityMatrix.map(r => r.indexOf(r.max))
@@ -78,8 +76,8 @@ class GaussianMixtureModel(
 
   /**
    * Maps given point to its cluster index.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def predict(point: Vector): Int = {
     val r = computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
     r.indexOf(r.max)
@@ -87,16 +85,16 @@ class GaussianMixtureModel(
 
   /**
    * Java-friendly version of [[predict()]]
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
     predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
 
   /**
    * Given the input vectors, return the membership value of each vector
    * to all mixture components.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = {
     val sc = points.sparkContext
     val bcDists = sc.broadcast(gaussians)
@@ -108,8 +106,8 @@ class GaussianMixtureModel(
 
   /**
    * Given the input vector, return the membership values to all mixture components.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predictSoft(point: Vector): Array[Double] = {
     computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
   }
@@ -133,9 +131,7 @@ class GaussianMixtureModel(
   }
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 @Experimental
 object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
 
@@ -186,9 +182,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
     }
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def load(sc: SparkContext, path: String) : GaussianMixtureModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 9ef6834e5e..3e9545a74b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}
 import org.apache.spark.mllib.util.MLUtils
@@ -49,20 +49,20 @@ class KMeans private (
   /**
    * Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
    * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random}.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
 
   /**
    * Number of clusters to create (k).
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getK: Int = k
 
   /**
    * Set the number of clusters to create (k). Default: 2.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -70,14 +70,14 @@ class KMeans private (
 
   /**
    * Maximum number of iterations to run.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getMaxIterations: Int = maxIterations
 
   /**
    * Set maximum number of iterations to run. Default: 20.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -85,16 +85,16 @@ class KMeans private (
 
   /**
    * The initialization algorithm. This can be either "random" or "k-means||".
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getInitializationMode: String = initializationMode
 
   /**
    * Set the initialization algorithm. This can be either "random" to choose random points as
    * initial cluster centers, or "k-means||" to use a parallel variant of k-means++
    * (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setInitializationMode(initializationMode: String): this.type = {
     KMeans.validateInitMode(initializationMode)
     this.initializationMode = initializationMode
@@ -104,8 +104,8 @@ class KMeans private (
   /**
    * :: Experimental ::
    * Number of runs of the algorithm to execute in parallel.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   @Experimental
   def getRuns: Int = runs
 
@@ -114,8 +114,8 @@ class KMeans private (
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
    * return the best clustering found over any run. Default: 1.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   @Experimental
   def setRuns(runs: Int): this.type = {
     if (runs <= 0) {
@@ -127,15 +127,15 @@ class KMeans private (
 
   /**
    * Number of steps for the k-means|| initialization mode
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getInitializationSteps: Int = initializationSteps
 
   /**
    * Set the number of steps for the k-means|| initialization mode. This is an advanced
    * setting -- the default of 5 is almost always enough. Default: 5.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setInitializationSteps(initializationSteps: Int): this.type = {
     if (initializationSteps <= 0) {
       throw new IllegalArgumentException("Number of initialization steps must be positive")
@@ -146,15 +146,15 @@ class KMeans private (
 
   /**
    * The distance threshold within which we've consider centers to have converged.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getEpsilon: Double = epsilon
 
   /**
    * Set the distance threshold within which we've consider centers to have converged.
    * If all centers move less than this Euclidean distance, we stop iterating one run.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setEpsilon(epsilon: Double): this.type = {
     this.epsilon = epsilon
     this
@@ -162,14 +162,14 @@ class KMeans private (
 
   /**
    * The random seed for cluster initialization.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getSeed: Long = seed
 
   /**
    * Set the random seed for cluster initialization.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -183,8 +183,8 @@ class KMeans private (
    * Set the initial starting point, bypassing the random initialization or k-means||
    * The condition model.k == this.k must be met, failure results
    * in an IllegalArgumentException.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setInitialModel(model: KMeansModel): this.type = {
     require(model.k == k, "mismatched cluster count")
     initialModel = Some(model)
@@ -194,8 +194,8 @@ class KMeans private (
   /**
    * Train a K-means model on the given set of points; `data` should be cached for high
    * performance, because this is an iterative algorithm.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def run(data: RDD[Vector]): KMeansModel = {
 
     if (data.getStorageLevel == StorageLevel.NONE) {
@@ -453,14 +453,14 @@ class KMeans private (
 
 /**
  * Top-level methods for calling K-means clustering.
- * @since 0.8.0
  */
+@Since("0.8.0")
 object KMeans {
 
   // Initialization mode names
-  /** @since 0.8.0 */
+  @Since("0.8.0")
   val RANDOM = "random"
-  /** @since 0.8.0 */
+  @Since("0.8.0")
   val K_MEANS_PARALLEL = "k-means||"
 
   /**
@@ -472,8 +472,8 @@ object KMeans {
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
    * @param initializationMode initialization model, either "random" or "k-means||" (default).
    * @param seed random seed value for cluster initialization
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def train(
       data: RDD[Vector],
       k: Int,
@@ -497,8 +497,8 @@ object KMeans {
    * @param maxIterations max number of iterations
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
    * @param initializationMode initialization model, either "random" or "k-means||" (default).
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       data: RDD[Vector],
       k: Int,
@@ -514,8 +514,8 @@ object KMeans {
 
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       data: RDD[Vector],
       k: Int,
@@ -525,8 +525,8 @@ object KMeans {
 
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       data: RDD[Vector],
       k: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 8de2087ceb..e425ecdd48 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -23,6 +23,7 @@ import org.json4s._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.pmml.PMMLExportable
@@ -34,35 +35,35 @@ import org.apache.spark.sql.Row
 
 /**
  * A clustering model for K-means. Each point belongs to the cluster with the closest center.
- * @since 0.8.0
  */
+@Since("0.8.0")
 class KMeansModel (
     val clusterCenters: Array[Vector]) extends Saveable with Serializable with PMMLExportable {
 
   /**
    * A Java-friendly constructor that takes an Iterable of Vectors.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def this(centers: java.lang.Iterable[Vector]) = this(centers.asScala.toArray)
 
   /**
    * Total number of clusters.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def k: Int = clusterCenters.length
 
   /**
    * Returns the cluster index that a given point belongs to.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def predict(point: Vector): Int = {
     KMeans.findClosest(clusterCentersWithNorm, new VectorWithNorm(point))._1
   }
 
   /**
    * Maps given points to their cluster indices.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(points: RDD[Vector]): RDD[Int] = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = points.context.broadcast(centersWithNorm)
@@ -71,16 +72,16 @@ class KMeansModel (
 
   /**
    * Maps given points to their cluster indices.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
     predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
 
   /**
    * Return the K-means cost (sum of squared distances of points to their nearest center) for this
    * model on the given data.
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def computeCost(data: RDD[Vector]): Double = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = data.context.broadcast(centersWithNorm)
@@ -90,9 +91,7 @@ class KMeansModel (
   private def clusterCentersWithNorm: Iterable[VectorWithNorm] =
     clusterCenters.map(new VectorWithNorm(_))
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     KMeansModel.SaveLoadV1_0.save(sc, this, path)
   }
@@ -100,14 +99,10 @@ class KMeansModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 object KMeansModel extends Loader[KMeansModel] {
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def load(sc: SparkContext, path: String): KMeansModel = {
     KMeansModel.SaveLoadV1_0.load(sc, path)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 2a8c6acbae..92a321afb0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
 import breeze.linalg.{DenseVector => BDV}
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.api.java.JavaPairRDD
 import org.apache.spark.graphx._
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -43,8 +43,8 @@ import org.apache.spark.util.Utils
  *
  * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
  *       (Wikipedia)]]
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class LDA private (
     private var k: Int,
@@ -57,8 +57,8 @@ class LDA private (
 
   /**
    * Constructs a LDA instance with default parameters.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(k = 10, maxIterations = 20, docConcentration = Vectors.dense(-1),
     topicConcentration = -1, seed = Utils.random.nextLong(), checkpointInterval = 10,
     ldaOptimizer = new EMLDAOptimizer)
@@ -66,15 +66,15 @@ class LDA private (
   /**
    * Number of topics to infer.  I.e., the number of soft cluster centers.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getK: Int = k
 
   /**
    * Number of topics to infer.  I.e., the number of soft cluster centers.
    * (default = 10)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setK(k: Int): this.type = {
     require(k > 0, s"LDA k (number of clusters) must be > 0, but was set to $k")
     this.k = k
@@ -86,8 +86,8 @@ class LDA private (
    * distributions over topics ("theta").
    *
    * This is the parameter to a Dirichlet distribution.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def getAsymmetricDocConcentration: Vector = this.docConcentration
 
   /**
@@ -96,8 +96,8 @@ class LDA private (
    *
    * This method assumes the Dirichlet distribution is symmetric and can be described by a single
    * [[Double]] parameter. It should fail if docConcentration is asymmetric.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getDocConcentration: Double = {
     val parameter = docConcentration(0)
     if (docConcentration.size == 1) {
@@ -131,8 +131,8 @@ class LDA private (
    *     - Values should be >= 0
    *     - default = uniformly (1.0 / k), following the implementation from
    *       [[https://github.com/Blei-Lab/onlineldavb]].
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setDocConcentration(docConcentration: Vector): this.type = {
     require(docConcentration.size > 0, "docConcentration must have > 0 elements")
     this.docConcentration = docConcentration
@@ -141,8 +141,8 @@ class LDA private (
 
   /**
    * Replicates a [[Double]] docConcentration to create a symmetric prior.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setDocConcentration(docConcentration: Double): this.type = {
     this.docConcentration = Vectors.dense(docConcentration)
     this
@@ -150,26 +150,26 @@ class LDA private (
 
   /**
    * Alias for [[getAsymmetricDocConcentration]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def getAsymmetricAlpha: Vector = getAsymmetricDocConcentration
 
   /**
    * Alias for [[getDocConcentration]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getAlpha: Double = getDocConcentration
 
   /**
    * Alias for [[setDocConcentration()]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setAlpha(alpha: Vector): this.type = setDocConcentration(alpha)
 
   /**
    * Alias for [[setDocConcentration()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setAlpha(alpha: Double): this.type = setDocConcentration(alpha)
 
   /**
@@ -180,8 +180,8 @@ class LDA private (
    *
    * Note: The topics' distributions over terms are called "beta" in the original LDA paper
    * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getTopicConcentration: Double = this.topicConcentration
 
   /**
@@ -205,8 +205,8 @@ class LDA private (
    *     - Value should be >= 0
    *     - default = (1.0 / k), following the implementation from
    *       [[https://github.com/Blei-Lab/onlineldavb]].
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setTopicConcentration(topicConcentration: Double): this.type = {
     this.topicConcentration = topicConcentration
     this
@@ -214,27 +214,27 @@ class LDA private (
 
   /**
    * Alias for [[getTopicConcentration]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getBeta: Double = getTopicConcentration
 
   /**
    * Alias for [[setTopicConcentration()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setBeta(beta: Double): this.type = setTopicConcentration(beta)
 
   /**
    * Maximum number of iterations for learning.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getMaxIterations: Int = maxIterations
 
   /**
    * Maximum number of iterations for learning.
    * (default = 20)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -242,14 +242,14 @@ class LDA private (
 
   /**
    * Random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getSeed: Long = seed
 
   /**
    * Random seed
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -257,8 +257,8 @@ class LDA private (
 
   /**
    * Period (in iterations) between checkpoints.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def getCheckpointInterval: Int = checkpointInterval
 
   /**
@@ -268,8 +268,8 @@ class LDA private (
    * [[org.apache.spark.SparkContext]], this setting is ignored.
    *
    * @see [[org.apache.spark.SparkContext#setCheckpointDir]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setCheckpointInterval(checkpointInterval: Int): this.type = {
     this.checkpointInterval = checkpointInterval
     this
@@ -280,8 +280,8 @@ class LDA private (
    * :: DeveloperApi ::
    *
    * LDAOptimizer used to perform the actual calculation
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   @DeveloperApi
   def getOptimizer: LDAOptimizer = ldaOptimizer
 
@@ -289,8 +289,8 @@ class LDA private (
    * :: DeveloperApi ::
    *
    * LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   @DeveloperApi
   def setOptimizer(optimizer: LDAOptimizer): this.type = {
     this.ldaOptimizer = optimizer
@@ -300,8 +300,8 @@ class LDA private (
   /**
    * Set the LDAOptimizer used to perform the actual calculation by algorithm name.
    * Currently "em", "online" are supported.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setOptimizer(optimizerName: String): this.type = {
     this.ldaOptimizer =
       optimizerName.toLowerCase match {
@@ -321,8 +321,8 @@ class LDA private (
    *                   (where the vocabulary size is the length of the vector).
    *                   Document IDs must be unique and >= 0.
    * @return  Inferred LDA model
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(documents: RDD[(Long, Vector)]): LDAModel = {
     val state = ldaOptimizer.initialize(documents, this)
     var iter = 0
@@ -339,8 +339,8 @@ class LDA private (
 
   /**
    * Java-friendly version of [[run()]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(documents: JavaPairRDD[java.lang.Long, Vector]): LDAModel = {
     run(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 6bc68a4c18..667374a2bc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -192,24 +192,16 @@ class LocalLDAModel private[clustering] (
     override protected[clustering] val gammaShape: Double = 100)
   extends LDAModel with Serializable {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def k: Int = topics.numCols
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def vocabSize: Int = topics.numRows
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def topicsMatrix: Matrix = topics
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
     val brzTopics = topics.toBreeze.toDenseMatrix
     Range(0, k).map { topicIndex =>
@@ -222,9 +214,7 @@ class LocalLDAModel private[clustering] (
 
   override protected def formatVersion = "1.0"
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def save(sc: SparkContext, path: String): Unit = {
     LocalLDAModel.SaveLoadV1_0.save(sc, path, topicsMatrix, docConcentration, topicConcentration,
       gammaShape)
@@ -238,16 +228,16 @@ class LocalLDAModel private[clustering] (
    *
    * @param documents test corpus to use for calculating log likelihood
    * @return variational lower bound on the log likelihood of the entire corpus
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def logLikelihood(documents: RDD[(Long, Vector)]): Double = logLikelihoodBound(documents,
     docConcentration, topicConcentration, topicsMatrix.toBreeze.toDenseMatrix, gammaShape, k,
     vocabSize)
 
   /**
    * Java-friendly version of [[logLikelihood]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def logLikelihood(documents: JavaPairRDD[java.lang.Long, Vector]): Double = {
     logLikelihood(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
   }
@@ -258,8 +248,8 @@ class LocalLDAModel private[clustering] (
    *
    * @param documents test corpus to use for calculating perplexity
    * @return Variational upper bound on log perplexity per token.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def logPerplexity(documents: RDD[(Long, Vector)]): Double = {
     val corpusTokenCount = documents
       .map { case (_, termCounts) => termCounts.toArray.sum }
@@ -267,9 +257,8 @@ class LocalLDAModel private[clustering] (
     -logLikelihood(documents) / corpusTokenCount
   }
 
-  /** Java-friendly version of [[logPerplexity]]
-   *  @since 1.5.0
-   */
+  /** Java-friendly version of [[logPerplexity]] */
+  @Since("1.5.0")
   def logPerplexity(documents: JavaPairRDD[java.lang.Long, Vector]): Double = {
     logPerplexity(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
   }
@@ -347,8 +336,8 @@ class LocalLDAModel private[clustering] (
    * for each document.
    * @param documents documents to predict topic mixture distributions for
    * @return An RDD of (document ID, topic mixture distribution for document)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   // TODO: declare in LDAModel and override once implemented in DistributedLDAModel
   def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = {
     // Double transpose because dirichletExpectation normalizes by row and we need to normalize
@@ -376,8 +365,8 @@ class LocalLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topicDistributions]]
-   * @since 1.4.1
    */
+  @Since("1.4.1")
   def topicDistributions(
       documents: JavaPairRDD[java.lang.Long, Vector]): JavaPairRDD[java.lang.Long, Vector] = {
     val distributions = topicDistributions(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
@@ -451,9 +440,7 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
     }
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def load(sc: SparkContext, path: String): LocalLDAModel = {
     val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats
@@ -510,8 +497,8 @@ class DistributedLDAModel private[clustering] (
    * Convert model to a local model.
    * The local model stores the inferred topics but not the topic distributions for training
    * documents.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toLocal: LocalLDAModel = new LocalLDAModel(topicsMatrix, docConcentration, topicConcentration,
     gammaShape)
 
@@ -521,8 +508,8 @@ class DistributedLDAModel private[clustering] (
    * No guarantees are given about the ordering of the topics.
    *
    * WARNING: This matrix is collected from an RDD. Beware memory usage when vocabSize, k are large.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override lazy val topicsMatrix: Matrix = {
     // Collect row-major topics
     val termTopicCounts: Array[(Int, TopicCounts)] =
@@ -541,9 +528,7 @@ class DistributedLDAModel private[clustering] (
     Matrices.fromBreeze(brzTopics)
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
     val numTopics = k
     // Note: N_k is not needed to find the top terms, but it is needed to normalize weights
@@ -582,8 +567,8 @@ class DistributedLDAModel private[clustering] (
    * @return  Array over topics.  Each element represent as a pair of matching arrays:
    *          (IDs for the documents, weights of the topic in these documents).
    *          For each topic, documents are sorted in order of decreasing topic weights.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def topDocumentsPerTopic(maxDocumentsPerTopic: Int): Array[(Array[Long], Array[Double])] = {
     val numTopics = k
     val topicsInQueues: Array[BoundedPriorityQueue[(Double, Long)]] =
@@ -666,8 +651,8 @@ class DistributedLDAModel private[clustering] (
    *  - This excludes the prior; for that, use [[logPrior]].
    *  - Even with [[logPrior]], this is NOT the same as the data log likelihood given the
    *    hyperparameters.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   lazy val logLikelihood: Double = {
     // TODO: generalize this for asymmetric (non-scalar) alpha
     val alpha = this.docConcentration(0) // To avoid closure capture of enclosing object
@@ -693,8 +678,8 @@ class DistributedLDAModel private[clustering] (
   /**
    * Log probability of the current parameter estimate:
    * log P(topics, topic distributions for docs | alpha, eta)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   lazy val logPrior: Double = {
     // TODO: generalize this for asymmetric (non-scalar) alpha
     val alpha = this.docConcentration(0) // To avoid closure capture of enclosing object
@@ -725,8 +710,8 @@ class DistributedLDAModel private[clustering] (
    * ("theta_doc").
    *
    * @return  RDD of (document ID, topic distribution) pairs
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def topicDistributions: RDD[(Long, Vector)] = {
     graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
       (docID.toLong, Vectors.fromBreeze(normalize(topicCounts, 1.0)))
@@ -735,8 +720,8 @@ class DistributedLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topicDistributions]]
-   * @since 1.4.1
    */
+  @Since("1.4.1")
   def javaTopicDistributions: JavaPairRDD[java.lang.Long, Vector] = {
     JavaPairRDD.fromRDD(topicDistributions.asInstanceOf[RDD[(java.lang.Long, Vector)]])
   }
@@ -744,8 +729,8 @@ class DistributedLDAModel private[clustering] (
   /**
    * For each document, return the top k weighted topics for that document and their weights.
    * @return RDD of (doc ID, topic indices, topic weights)
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def topTopicsPerDocument(k: Int): RDD[(Long, Array[Int], Array[Double])] = {
     graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
       val topIndices = argtopk(topicCounts, k)
@@ -761,8 +746,8 @@ class DistributedLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topTopicsPerDocument]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def javaTopTopicsPerDocument(k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[Double])] = {
     val topics = topTopicsPerDocument(k)
     topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Double])]].toJavaRDD()
@@ -775,8 +760,8 @@ class DistributedLDAModel private[clustering] (
 
   /**
    * Java-friendly version of [[topicDistributions]]
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   override def save(sc: SparkContext, path: String): Unit = {
     DistributedLDAModel.SaveLoadV1_0.save(
       sc, path, graph, globalTopicTotals, k, vocabSize, docConcentration, topicConcentration,
@@ -877,9 +862,7 @@ object DistributedLDAModel extends Loader[DistributedLDAModel] {
 
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def load(sc: SparkContext, path: String): DistributedLDAModel = {
     val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index cb517f9689..5c2aae6403 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, all, normalize, su
 import breeze.numerics.{trigamma, abs, exp}
 import breeze.stats.distributions.{Gamma, RandBasis}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
 import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
@@ -35,8 +35,8 @@ import org.apache.spark.rdd.RDD
  *
  * An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can
  * hold optimizer-specific parameters for users to set.
- * @since 1.4.0
  */
+@Since("1.4.0")
 @DeveloperApi
 sealed trait LDAOptimizer {
 
@@ -74,8 +74,8 @@ sealed trait LDAOptimizer {
  *  - Paper which clearly explains several algorithms, including EM:
  *    Asuncion, Welling, Smyth, and Teh.
  *    "On Smoothing and Inference for Topic Models."  UAI, 2009.
- * @since 1.4.0
  */
+@Since("1.4.0")
 @DeveloperApi
 final class EMLDAOptimizer extends LDAOptimizer {
 
@@ -226,8 +226,8 @@ final class EMLDAOptimizer extends LDAOptimizer {
  *
  * Original Online LDA paper:
  *   Hoffman, Blei and Bach, "Online Learning for Latent Dirichlet Allocation." NIPS, 2010.
- * @since 1.4.0
  */
+@Since("1.4.0")
 @DeveloperApi
 final class OnlineLDAOptimizer extends LDAOptimizer {
 
@@ -276,16 +276,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getTau0: Double = this.tau0
 
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
    * Default: 1024, following the original Online LDA paper.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setTau0(tau0: Double): this.type = {
     require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
     this.tau0 = tau0
@@ -294,16 +294,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
   /**
    * Learning rate: exponential decay rate
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getKappa: Double = this.kappa
 
   /**
    * Learning rate: exponential decay rate---should be between
    * (0.5, 1.0] to guarantee asymptotic convergence.
    * Default: 0.51, based on the original Online LDA paper.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setKappa(kappa: Double): this.type = {
     require(kappa >= 0, s"Online LDA kappa must be nonnegative, but was set to $kappa")
     this.kappa = kappa
@@ -312,8 +312,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
   /**
    * Mini-batch fraction, which sets the fraction of document sampled and used in each iteration
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getMiniBatchFraction: Double = this.miniBatchFraction
 
   /**
@@ -325,8 +325,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
    * maxIterations * miniBatchFraction >= 1.
    *
    * Default: 0.05, i.e., 5% of total documents.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
     require(miniBatchFraction > 0.0 && miniBatchFraction <= 1.0,
       s"Online LDA miniBatchFraction must be in range (0,1], but was set to $miniBatchFraction")
@@ -337,16 +337,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
   /**
    * Optimize alpha, indicates whether alpha (Dirichlet parameter for document-topic distribution)
    * will be optimized during training.
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def getOptimzeAlpha: Boolean = this.optimizeAlpha
 
   /**
    * Sets whether to optimize alpha parameter during training.
    *
    * Default: false
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setOptimzeAlpha(optimizeAlpha: Boolean): this.type = {
     this.optimizeAlpha = optimizeAlpha
     this
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index b4733ca975..396b36f2f6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -21,7 +21,7 @@ import org.json4s.JsonDSL._
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
@@ -39,16 +39,14 @@ import org.apache.spark.{Logging, SparkContext, SparkException}
  *
  * @param k number of clusters
  * @param assignments an RDD of clustering [[PowerIterationClustering#Assignment]]s
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class PowerIterationClusteringModel(
     val k: Int,
     val assignments: RDD[PowerIterationClustering.Assignment]) extends Saveable with Serializable {
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     PowerIterationClusteringModel.SaveLoadV1_0.save(sc, this, path)
   }
@@ -56,9 +54,7 @@ class PowerIterationClusteringModel(
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 object PowerIterationClusteringModel extends Loader[PowerIterationClusteringModel] {
   override def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
     PowerIterationClusteringModel.SaveLoadV1_0.load(sc, path)
@@ -73,8 +69,8 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
     val thisClassName = "org.apache.spark.mllib.clustering.PowerIterationClusteringModel"
 
     /**
-     * @since 1.4.0
      */
+    @Since("1.4.0")
     def save(sc: SparkContext, model: PowerIterationClusteringModel, path: String): Unit = {
       val sqlContext = new SQLContext(sc)
       import sqlContext.implicits._
@@ -87,9 +83,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
       dataRDD.write.parquet(Loader.dataPath(path))
     }
 
-    /**
-     * @since 1.4.0
-     */
+    @Since("1.4.0")
     def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
       implicit val formats = DefaultFormats
       val sqlContext = new SQLContext(sc)
@@ -136,14 +130,14 @@ class PowerIterationClustering private[clustering] (
   /**
    * Constructs a PIC instance with default parameters: {k: 2, maxIterations: 100,
    * initMode: "random"}.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(k = 2, maxIterations = 100, initMode = "random")
 
   /**
    * Set the number of clusters.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -151,8 +145,8 @@ class PowerIterationClustering private[clustering] (
 
   /**
    * Set maximum number of iterations of the power iteration loop
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
@@ -161,8 +155,8 @@ class PowerIterationClustering private[clustering] (
   /**
    * Set the initialization mode. This can be either "random" to use a random vector
    * as vertex properties, or "degree" to use normalized sum similarities. Default: random.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setInitializationMode(mode: String): this.type = {
     this.initMode = mode match {
       case "random" | "degree" => mode
@@ -182,8 +176,8 @@ class PowerIterationClustering private[clustering] (
    *              assume s,,ij,, = 0.0.
    *
    * @return a [[PowerIterationClusteringModel]] that contains the clustering result
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def run(graph: Graph[Double, Double]): PowerIterationClusteringModel = {
     val w = normalize(graph)
     val w0 = initMode match {
@@ -204,8 +198,8 @@ class PowerIterationClustering private[clustering] (
    *                     assume s,,ij,, = 0.0.
    *
    * @return a [[PowerIterationClusteringModel]] that contains the clustering result
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(similarities: RDD[(Long, Long, Double)]): PowerIterationClusteringModel = {
     val w = normalize(similarities)
     val w0 = initMode match {
@@ -217,8 +211,8 @@ class PowerIterationClustering private[clustering] (
 
   /**
    * A Java-friendly version of [[PowerIterationClustering.run]].
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run(similarities: JavaRDD[(java.lang.Long, java.lang.Long, java.lang.Double)])
     : PowerIterationClusteringModel = {
     run(similarities.rdd.asInstanceOf[RDD[(Long, Long, Double)]])
@@ -242,9 +236,7 @@ class PowerIterationClustering private[clustering] (
   }
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 @Experimental
 object PowerIterationClustering extends Logging {
 
@@ -253,8 +245,8 @@ object PowerIterationClustering extends Logging {
    * Cluster assignment.
    * @param id node id
    * @param cluster assigned cluster id
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   @Experimental
   case class Assignment(id: Long, cluster: Int)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index a915804b02..41f2668ec6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaSparkContext._
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.rdd.RDD
@@ -63,9 +63,8 @@ import org.apache.spark.util.random.XORShiftRandom
  * such that at time t + h the discount applied to the data from t is 0.5.
  * The definition remains the same whether the time unit is given
  * as batches or points.
- * @since 1.2.0
- *
  */
+@Since("1.2.0")
 @Experimental
 class StreamingKMeansModel(
     override val clusterCenters: Array[Vector],
@@ -73,8 +72,8 @@ class StreamingKMeansModel(
 
   /**
    * Perform a k-means update on a batch of data.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def update(data: RDD[Vector], decayFactor: Double, timeUnit: String): StreamingKMeansModel = {
 
     // find nearest cluster to each point
@@ -166,23 +165,23 @@ class StreamingKMeansModel(
  *    .setRandomCenters(5, 100.0)
  *    .trainOn(DStream)
  * }}}
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class StreamingKMeans(
     var k: Int,
     var decayFactor: Double,
     var timeUnit: String) extends Logging with Serializable {
 
-  /** @since 1.2.0 */
+  @Since("1.2.0")
   def this() = this(2, 1.0, StreamingKMeans.BATCHES)
 
   protected var model: StreamingKMeansModel = new StreamingKMeansModel(null, null)
 
   /**
    * Set the number of clusters.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setK(k: Int): this.type = {
     this.k = k
     this
@@ -190,8 +189,8 @@ class StreamingKMeans(
 
   /**
    * Set the decay factor directly (for forgetful algorithms).
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setDecayFactor(a: Double): this.type = {
     this.decayFactor = a
     this
@@ -199,8 +198,8 @@ class StreamingKMeans(
 
   /**
    * Set the half life and time unit ("batches" or "points") for forgetful algorithms.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setHalfLife(halfLife: Double, timeUnit: String): this.type = {
     if (timeUnit != StreamingKMeans.BATCHES && timeUnit != StreamingKMeans.POINTS) {
       throw new IllegalArgumentException("Invalid time unit for decay: " + timeUnit)
@@ -213,8 +212,8 @@ class StreamingKMeans(
 
   /**
    * Specify initial centers directly.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setInitialCenters(centers: Array[Vector], weights: Array[Double]): this.type = {
     model = new StreamingKMeansModel(centers, weights)
     this
@@ -226,8 +225,8 @@ class StreamingKMeans(
    * @param dim Number of dimensions
    * @param weight Weight for each center
    * @param seed Random seed
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setRandomCenters(dim: Int, weight: Double, seed: Long = Utils.random.nextLong): this.type = {
     val random = new XORShiftRandom(seed)
     val centers = Array.fill(k)(Vectors.dense(Array.fill(dim)(random.nextGaussian())))
@@ -238,8 +237,8 @@ class StreamingKMeans(
 
   /**
    * Return the latest model.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def latestModel(): StreamingKMeansModel = {
     model
   }
@@ -251,8 +250,8 @@ class StreamingKMeans(
    * and updates the model using each batch of data from the stream.
    *
    * @param data DStream containing vector data
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainOn(data: DStream[Vector]) {
     assertInitialized()
     data.foreachRDD { (rdd, time) =>
@@ -262,8 +261,8 @@ class StreamingKMeans(
 
   /**
    * Java-friendly version of `trainOn`.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def trainOn(data: JavaDStream[Vector]): Unit = trainOn(data.dstream)
 
   /**
@@ -271,8 +270,8 @@ class StreamingKMeans(
    *
    * @param data DStream containing vector data
    * @return DStream containing predictions
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def predictOn(data: DStream[Vector]): DStream[Int] = {
     assertInitialized()
     data.map(model.predict)
@@ -280,8 +279,8 @@ class StreamingKMeans(
 
   /**
    * Java-friendly version of `predictOn`.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Integer] = {
     JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Integer]])
   }
@@ -292,8 +291,8 @@ class StreamingKMeans(
    * @param data DStream containing (key, feature vector) pairs
    * @tparam K key type
    * @return DStream containing the input keys and the predictions as values
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Int)] = {
     assertInitialized()
     data.mapValues(model.predict)
@@ -301,8 +300,8 @@ class StreamingKMeans(
 
   /**
    * Java-friendly version of `predictOnValues`.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def predictOnValues[K](
       data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Integer] = {
     implicit val tag = fakeClassTag[K]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 486741edd6..76ae847921 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.evaluation
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.Logging
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.evaluation.binary._
@@ -41,8 +41,8 @@ import org.apache.spark.sql.DataFrame
  *                of bins may not exactly equal numBins. The last bin in each partition may
  *                be smaller as a result, meaning there may be an extra sample at
  *                partition boundaries.
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class BinaryClassificationMetrics(
     val scoreAndLabels: RDD[(Double, Double)],
@@ -52,8 +52,8 @@ class BinaryClassificationMetrics(
 
   /**
    * Defaults `numBins` to 0.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def this(scoreAndLabels: RDD[(Double, Double)]) = this(scoreAndLabels, 0)
 
   /**
@@ -65,16 +65,16 @@ class BinaryClassificationMetrics(
 
   /**
    * Unpersist intermediate RDDs used in the computation.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def unpersist() {
     cumulativeCounts.unpersist()
   }
 
   /**
    * Returns thresholds in descending order.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def thresholds(): RDD[Double] = cumulativeCounts.map(_._1)
 
   /**
@@ -82,8 +82,8 @@ class BinaryClassificationMetrics(
    * which is an RDD of (false positive rate, true positive rate)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
    * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def roc(): RDD[(Double, Double)] = {
     val rocCurve = createCurve(FalsePositiveRate, Recall)
     val sc = confusions.context
@@ -94,16 +94,16 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the receiver operating characteristic (ROC) curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def areaUnderROC(): Double = AreaUnderCurve.of(roc())
 
   /**
    * Returns the precision-recall curve, which is an RDD of (recall, precision),
    * NOT (precision, recall), with (0.0, 1.0) prepended to it.
    * @see http://en.wikipedia.org/wiki/Precision_and_recall
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def pr(): RDD[(Double, Double)] = {
     val prCurve = createCurve(Recall, Precision)
     val sc = confusions.context
@@ -113,8 +113,8 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the precision-recall curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def areaUnderPR(): Double = AreaUnderCurve.of(pr())
 
   /**
@@ -122,26 +122,26 @@ class BinaryClassificationMetrics(
    * @param beta the beta factor in F-Measure computation.
    * @return an RDD of (threshold, F-Measure) pairs.
    * @see http://en.wikipedia.org/wiki/F1_score
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = createCurve(FMeasure(beta))
 
   /**
    * Returns the (threshold, F-Measure) curve with beta = 1.0.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def fMeasureByThreshold(): RDD[(Double, Double)] = fMeasureByThreshold(1.0)
 
   /**
    * Returns the (threshold, precision) curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def precisionByThreshold(): RDD[(Double, Double)] = createCurve(Precision)
 
   /**
    * Returns the (threshold, recall) curve.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def recallByThreshold(): RDD[(Double, Double)] = createCurve(Recall)
 
   private lazy val (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index dddfa3ea5b..02e89d9210 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.evaluation
 import scala.collection.Map
 
 import org.apache.spark.SparkContext._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg.{Matrices, Matrix}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
@@ -30,8 +30,8 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multiclass classification.
  *
  * @param predictionAndLabels an RDD of (prediction, label) pairs.
- * @since 1.1.0
  */
+@Since("1.1.0")
 @Experimental
 class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
 
@@ -65,8 +65,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
    * predicted classes are in columns,
    * they are ordered by class label ascending,
    * as in "labels"
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def confusionMatrix: Matrix = {
     val n = labels.size
     val values = Array.ofDim[Double](n * n)
@@ -85,15 +85,15 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns true positive rate for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def truePositiveRate(label: Double): Double = recall(label)
 
   /**
    * Returns false positive rate for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def falsePositiveRate(label: Double): Double = {
     val fp = fpByClass.getOrElse(label, 0)
     fp.toDouble / (labelCount - labelCountByClass(label))
@@ -102,8 +102,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns precision for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def precision(label: Double): Double = {
     val tp = tpByClass(label)
     val fp = fpByClass.getOrElse(label, 0)
@@ -113,16 +113,16 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns recall for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label)
 
   /**
    * Returns f-measure for a given label (category)
    * @param label the label.
    * @param beta the beta parameter.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def fMeasure(label: Double, beta: Double): Double = {
     val p = precision(label)
     val r = recall(label)
@@ -133,8 +133,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def fMeasure(label: Double): Double = fMeasure(label, 1.0)
 
   /**
@@ -187,8 +187,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
   /**
    * Returns weighted averaged f-measure
    * @param beta the beta parameter.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) =>
     fMeasure(category, beta) * count.toDouble / labelCount
   }.sum
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index 77cb1e09bd..a0a8d9c568 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.evaluation
 
+import org.apache.spark.annotation.Since
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.sql.DataFrame
@@ -25,8 +26,8 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multilabel classification.
  * @param predictionAndLabels an RDD of (predictions, labels) pairs,
  * both are non-null Arrays, each with unique elements.
- * @since 1.2.0
  */
+@Since("1.2.0")
 class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
 
   /**
@@ -104,8 +105,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns precision for a given label (category)
    * @param label the label.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def precision(label: Double): Double = {
     val tp = tpPerClass(label)
     val fp = fpPerClass.getOrElse(label, 0L)
@@ -115,8 +116,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns recall for a given label (category)
    * @param label the label.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def recall(label: Double): Double = {
     val tp = tpPerClass(label)
     val fn = fnPerClass.getOrElse(label, 0L)
@@ -126,8 +127,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def f1Measure(label: Double): Double = {
     val p = precision(label)
     val r = recall(label)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 063fbed8cd..a7f43f0b11 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 import org.apache.spark.rdd.RDD
 
@@ -34,8 +34,8 @@ import org.apache.spark.rdd.RDD
  * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
  *
  * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])])
   extends Logging with Serializable {
@@ -56,8 +56,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated precision, must be positive
    * @return the average precision at the first k ranking positions
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def precisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
     predictionAndLabels.map { case (pred, lab) =>
@@ -126,8 +126,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated ndcg, must be positive
    * @return the average ndcg at the first k ranking positions
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def ndcgAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
     predictionAndLabels.map { case (pred, lab) =>
@@ -165,8 +165,8 @@ object RankingMetrics {
   /**
    * Creates a [[RankingMetrics]] instance (for Java users).
    * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
     implicit val tag = JavaSparkContext.fakeClassTag[E]
     val rdd = predictionAndLabels.rdd.map { case (predictions, labels) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 54dfd8c099..36a6c357c3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.evaluation
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.Logging
 import org.apache.spark.mllib.linalg.Vectors
@@ -29,8 +29,8 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for regression.
  *
  * @param predictionAndObservations an RDD of (prediction, observation) pairs.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
 
@@ -67,8 +67,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
    * Returns the variance explained by regression.
    * explainedVariance = \sum_i (\hat{y_i} - \bar{y})^2 / n
    * @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def explainedVariance: Double = {
     SSreg / summary.count
   }
@@ -76,8 +76,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the mean absolute error, which is a risk function corresponding to the
    * expected value of the absolute error loss or l1-norm loss.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def meanAbsoluteError: Double = {
     summary.normL1(1) / summary.count
   }
@@ -85,8 +85,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the mean squared error, which is a risk function corresponding to the
    * expected value of the squared error loss or quadratic loss.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def meanSquaredError: Double = {
     SSerr / summary.count
   }
@@ -94,8 +94,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns the root mean squared error, which is defined as the square root of
    * the mean squared error.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def rootMeanSquaredError: Double = {
     math.sqrt(this.meanSquaredError)
   }
@@ -103,8 +103,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
   /**
    * Returns R^2^, the unadjusted coefficient of determination.
    * @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def r2: Double = {
     1 - SSerr / SStot
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 7f4de77044..ba3b447a83 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -20,7 +20,7 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.mllib.fpm.AssociationRules.Rule
@@ -33,24 +33,22 @@ import org.apache.spark.rdd.RDD
  * Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates
  * association rules which have a single item as the consequent.
  *
- * @since 1.5.0
  */
+@Since("1.5.0")
 @Experimental
 class AssociationRules private[fpm] (
     private var minConfidence: Double) extends Logging with Serializable {
 
   /**
    * Constructs a default instance with default parameters {minConfidence = 0.8}.
-   *
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def this() = this(0.8)
 
   /**
    * Sets the minimal confidence (default: `0.8`).
-   *
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def setMinConfidence(minConfidence: Double): this.type = {
     require(minConfidence >= 0.0 && minConfidence <= 1.0)
     this.minConfidence = minConfidence
@@ -62,8 +60,8 @@ class AssociationRules private[fpm] (
    * @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
    * @return a [[Set[Rule[Item]]] containing the assocation rules.
    *
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def run[Item: ClassTag](freqItemsets: RDD[FreqItemset[Item]]): RDD[Rule[Item]] = {
     // For candidate rule X => Y, generate (X, (Y, freq(X union Y)))
     val candidates = freqItemsets.flatMap { itemset =>
@@ -102,8 +100,8 @@ object AssociationRules {
    *                   instead.
    * @tparam Item item type
    *
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   @Experimental
   class Rule[Item] private[fpm] (
       val antecedent: Array[Item],
@@ -114,8 +112,8 @@ object AssociationRules {
     /**
      * Returns the confidence of the rule.
      *
-     * @since 1.5.0
      */
+    @Since("1.5.0")
     def confidence: Double = freqUnion.toDouble / freqAntecedent
 
     require(antecedent.toSet.intersect(consequent.toSet).isEmpty, {
@@ -127,8 +125,8 @@ object AssociationRules {
     /**
      * Returns antecedent in a Java List.
      *
-     * @since 1.5.0
      */
+    @Since("1.5.0")
     def javaAntecedent: java.util.List[Item] = {
       antecedent.toList.asJava
     }
@@ -136,8 +134,8 @@ object AssociationRules {
     /**
      * Returns consequent in a Java List.
      *
-     * @since 1.5.0
      */
+    @Since("1.5.0")
     def javaConsequent: java.util.List[Item] = {
       consequent.toList.asJava
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index e2370a52f4..e37f806271 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.{HashPartitioner, Logging, Partitioner, SparkException}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.mllib.fpm.FPGrowth._
@@ -39,15 +39,15 @@ import org.apache.spark.storage.StorageLevel
  * @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]]
  * @tparam Item item type
  *
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
   /**
    * Generates association rules for the [[Item]]s in [[freqItemsets]].
    * @param confidence minimal confidence of the rules produced
-   * @since 1.5.0
    */
+  @Since("1.5.0")
   def generateAssociationRules(confidence: Double): RDD[AssociationRules.Rule[Item]] = {
     val associationRules = new AssociationRules(confidence)
     associationRules.run(freqItemsets)
@@ -71,8 +71,8 @@ class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) ex
  * @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
  *       (Wikipedia)]]
  *
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class FPGrowth private (
     private var minSupport: Double,
@@ -82,15 +82,15 @@ class FPGrowth private (
    * Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same
    * as the input data}.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this() = this(0.3, -1)
 
   /**
    * Sets the minimal support level (default: `0.3`).
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setMinSupport(minSupport: Double): this.type = {
     this.minSupport = minSupport
     this
@@ -99,8 +99,8 @@ class FPGrowth private (
   /**
    * Sets the number of partitions used by parallel FP-growth (default: same as input data).
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def setNumPartitions(numPartitions: Int): this.type = {
     this.numPartitions = numPartitions
     this
@@ -111,8 +111,8 @@ class FPGrowth private (
    * @param data input data set, each element contains a transaction
    * @return an [[FPGrowthModel]]
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def run[Item: ClassTag](data: RDD[Array[Item]]): FPGrowthModel[Item] = {
     if (data.getStorageLevel == StorageLevel.NONE) {
       logWarning("Input data is not cached.")
@@ -213,8 +213,8 @@ class FPGrowth private (
 /**
  * :: Experimental ::
  *
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 object FPGrowth {
 
@@ -224,15 +224,15 @@ object FPGrowth {
    * @param freq frequency
    * @tparam Item item type
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   class FreqItemset[Item](val items: Array[Item], val freq: Long) extends Serializable {
 
     /**
      * Returns items in a Java List.
      *
-     * @since 1.3.0
      */
+    @Since("1.3.0")
     def javaItems: java.util.List[Item] = {
       items.toList.asJava
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index dfa8910fcb..28b5b4637b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHash
 
 import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
@@ -227,8 +227,8 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
  * @param values matrix entries in column major if not transposed or in row major otherwise
  * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
  *                     row major.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @SQLUserDefinedType(udt = classOf[MatrixUDT])
 class DenseMatrix(
     val numRows: Int,
@@ -253,8 +253,8 @@ class DenseMatrix(
    * @param numRows number of rows
    * @param numCols number of columns
    * @param values matrix entries in column major
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this(numRows: Int, numCols: Int, values: Array[Double]) =
     this(numRows, numCols, values, false)
 
@@ -278,9 +278,7 @@ class DenseMatrix(
 
   private[mllib] def apply(i: Int): Double = values(i)
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def apply(i: Int, j: Int): Double = values(index(i, j))
 
   private[mllib] def index(i: Int, j: Int): Int = {
@@ -291,9 +289,7 @@ class DenseMatrix(
     values(index(i, j)) = v
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())
 
   private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f),
@@ -309,9 +305,7 @@ class DenseMatrix(
     this
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
 
   private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
@@ -342,21 +336,17 @@ class DenseMatrix(
     }
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def numNonzeros: Int = values.count(_ != 0)
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def numActives: Int = values.length
 
   /**
    * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
    * set to false.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toSparse: SparseMatrix = {
     val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
     val colPtrs: Array[Int] = new Array[Int](numCols + 1)
@@ -383,8 +373,8 @@ class DenseMatrix(
 
 /**
  * Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]].
- * @since 1.3.0
  */
+@Since("1.3.0")
 object DenseMatrix {
 
   /**
@@ -392,8 +382,8 @@ object DenseMatrix {
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
    * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def zeros(numRows: Int, numCols: Int): DenseMatrix = {
     require(numRows.toLong * numCols <= Int.MaxValue,
             s"$numRows x $numCols dense matrix is too large to allocate")
@@ -405,8 +395,8 @@ object DenseMatrix {
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
    * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def ones(numRows: Int, numCols: Int): DenseMatrix = {
     require(numRows.toLong * numCols <= Int.MaxValue,
             s"$numRows x $numCols dense matrix is too large to allocate")
@@ -417,8 +407,8 @@ object DenseMatrix {
    * Generate an Identity Matrix in `DenseMatrix` format.
    * @param n number of rows and columns of the matrix
    * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def eye(n: Int): DenseMatrix = {
     val identity = DenseMatrix.zeros(n, n)
     var i = 0
@@ -435,8 +425,8 @@ object DenseMatrix {
    * @param numCols number of columns of the matrix
    * @param rng a random number generator
    * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
     require(numRows.toLong * numCols <= Int.MaxValue,
             s"$numRows x $numCols dense matrix is too large to allocate")
@@ -449,8 +439,8 @@ object DenseMatrix {
    * @param numCols number of columns of the matrix
    * @param rng a random number generator
    * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
     require(numRows.toLong * numCols <= Int.MaxValue,
             s"$numRows x $numCols dense matrix is too large to allocate")
@@ -462,8 +452,8 @@ object DenseMatrix {
    * @param vector a `Vector` that will form the values on the diagonal of the matrix
    * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
    *         on the diagonal
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def diag(vector: Vector): DenseMatrix = {
     val n = vector.size
     val matrix = DenseMatrix.zeros(n, n)
@@ -498,8 +488,8 @@ object DenseMatrix {
  * @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
  *                     Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
  *                     and `rowIndices` behave as colIndices, and `values` are stored in row major.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @SQLUserDefinedType(udt = classOf[MatrixUDT])
 class SparseMatrix(
     val numRows: Int,
@@ -536,8 +526,8 @@ class SparseMatrix(
    * @param rowIndices the row index of the entry. They must be in strictly increasing
    *                   order for each column
    * @param values non-zero matrix entries in column major
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def this(
       numRows: Int,
       numCols: Int,
@@ -560,8 +550,8 @@ class SparseMatrix(
   }
 
   /**
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def apply(i: Int, j: Int): Double = {
     val ind = index(i, j)
     if (ind < 0) 0.0 else values(ind)
@@ -585,9 +575,7 @@ class SparseMatrix(
     }
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def copy: SparseMatrix = {
     new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
   }
@@ -605,9 +593,7 @@ class SparseMatrix(
     this
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def transpose: SparseMatrix =
     new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)
 
@@ -641,28 +627,24 @@ class SparseMatrix(
   /**
    * Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
    * set to false.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toDense: DenseMatrix = {
     new DenseMatrix(numRows, numCols, toArray)
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def numNonzeros: Int = values.count(_ != 0)
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def numActives: Int = values.length
 
 }
 
 /**
  * Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]].
- * @since 1.3.0
  */
+@Since("1.3.0")
 object SparseMatrix {
 
   /**
@@ -673,8 +655,8 @@ object SparseMatrix {
    * @param numCols number of columns of the matrix
    * @param entries Array of (i, j, value) tuples
    * @return The corresponding `SparseMatrix`
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
     val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
     val numEntries = sortedEntries.size
@@ -722,8 +704,8 @@ object SparseMatrix {
    * Generate an Identity Matrix in `SparseMatrix` format.
    * @param n number of rows and columns of the matrix
    * @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def speye(n: Int): SparseMatrix = {
     new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
   }
@@ -792,8 +774,8 @@ object SparseMatrix {
    * @param density the desired density for the matrix
    * @param rng a random number generator
    * @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
     val mat = genRandMatrix(numRows, numCols, density, rng)
     mat.update(i => rng.nextDouble())
@@ -806,8 +788,8 @@ object SparseMatrix {
    * @param density the desired density for the matrix
    * @param rng a random number generator
    * @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
     val mat = genRandMatrix(numRows, numCols, density, rng)
     mat.update(i => rng.nextGaussian())
@@ -818,8 +800,8 @@ object SparseMatrix {
    * @param vector a `Vector` that will form the values on the diagonal of the matrix
    * @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
    *         `values` on the diagonal
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def spdiag(vector: Vector): SparseMatrix = {
     val n = vector.size
     vector match {
@@ -835,8 +817,8 @@ object SparseMatrix {
 
 /**
  * Factory methods for [[org.apache.spark.mllib.linalg.Matrix]].
- * @since 1.0.0
  */
+@Since("1.0.0")
 object Matrices {
 
   /**
@@ -845,8 +827,8 @@ object Matrices {
    * @param numRows number of rows
    * @param numCols number of columns
    * @param values matrix entries in column major
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
     new DenseMatrix(numRows, numCols, values)
   }
@@ -859,8 +841,8 @@ object Matrices {
    * @param colPtrs the index corresponding to the start of a new column
    * @param rowIndices the row index of the entry
    * @param values non-zero matrix entries in column major
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def sparse(
      numRows: Int,
      numCols: Int,
@@ -893,8 +875,8 @@ object Matrices {
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
    * @return `Matrix` with size `numRows` x `numCols` and values of zeros
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
 
   /**
@@ -902,24 +884,24 @@ object Matrices {
    * @param numRows number of rows of the matrix
    * @param numCols number of columns of the matrix
    * @return `Matrix` with size `numRows` x `numCols` and values of ones
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
 
   /**
    * Generate a dense Identity Matrix in `Matrix` format.
    * @param n number of rows and columns of the matrix
    * @return `Matrix` with size `n` x `n` and values of ones on the diagonal
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def eye(n: Int): Matrix = DenseMatrix.eye(n)
 
   /**
    * Generate a sparse Identity Matrix in `Matrix` format.
    * @param n number of rows and columns of the matrix
    * @return `Matrix` with size `n` x `n` and values of ones on the diagonal
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def speye(n: Int): Matrix = SparseMatrix.speye(n)
 
   /**
@@ -928,8 +910,8 @@ object Matrices {
    * @param numCols number of columns of the matrix
    * @param rng a random number generator
    * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
     DenseMatrix.rand(numRows, numCols, rng)
 
@@ -940,8 +922,8 @@ object Matrices {
    * @param density the desired density for the matrix
    * @param rng a random number generator
    * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
     SparseMatrix.sprand(numRows, numCols, density, rng)
 
@@ -951,8 +933,8 @@ object Matrices {
    * @param numCols number of columns of the matrix
    * @param rng a random number generator
    * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
     DenseMatrix.randn(numRows, numCols, rng)
 
@@ -963,8 +945,8 @@ object Matrices {
    * @param density the desired density for the matrix
    * @param rng a random number generator
    * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
     SparseMatrix.sprandn(numRows, numCols, density, rng)
 
@@ -973,8 +955,8 @@ object Matrices {
    * @param vector a `Vector` that will form the values on the diagonal of the matrix
    * @return Square `Matrix` with size `values.length` x `values.length` and `values`
    *         on the diagonal
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
 
   /**
@@ -983,8 +965,8 @@ object Matrices {
    * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
    * @param matrices array of matrices
    * @return a single `Matrix` composed of the matrices that were horizontally concatenated
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def horzcat(matrices: Array[Matrix]): Matrix = {
     if (matrices.isEmpty) {
       return new DenseMatrix(0, 0, Array[Double]())
@@ -1042,8 +1024,8 @@ object Matrices {
    * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
    * @param matrices array of matrices
    * @return a single `Matrix` composed of the matrices that were vertically concatenated
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def vertcat(matrices: Array[Matrix]): Matrix = {
     if (matrices.isEmpty) {
       return new DenseMatrix(0, 0, Array[Double]())
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
index 8f504f6984..a37aca99d5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.mllib.linalg
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 
 /**
  * :: Experimental ::
  * Represents singular value decomposition (SVD) factors.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 52ef7be3b3..3d577edbe2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -26,7 +26,7 @@ import scala.collection.JavaConverters._
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.{AlphaComponent, Since}
 import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
@@ -240,14 +240,14 @@ class VectorUDT extends UserDefinedType[Vector] {
  * Factory methods for [[org.apache.spark.mllib.linalg.Vector]].
  * We don't use the name `Vector` because Scala imports
  * [[scala.collection.immutable.Vector]] by default.
- * @since 1.0.0
  */
+@Since("1.0.0")
 object Vectors {
 
   /**
    * Creates a dense vector from its values.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @varargs
   def dense(firstValue: Double, otherValues: Double*): Vector =
     new DenseVector((firstValue +: otherValues).toArray)
@@ -255,8 +255,8 @@ object Vectors {
   // A dummy implicit is used to avoid signature collision with the one generated by @varargs.
   /**
    * Creates a dense vector from a double array.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def dense(values: Array[Double]): Vector = new DenseVector(values)
 
   /**
@@ -265,8 +265,8 @@ object Vectors {
    * @param size vector size.
    * @param indices index array, must be strictly increasing.
    * @param values value array, must have the same length as indices.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
     new SparseVector(size, indices, values)
 
@@ -275,8 +275,8 @@ object Vectors {
    *
    * @param size vector size.
    * @param elements vector elements in (index, value) pairs.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
     require(size > 0, "The size of the requested sparse vector must be greater than 0.")
 
@@ -297,8 +297,8 @@ object Vectors {
    *
    * @param size vector size.
    * @param elements vector elements in (index, value) pairs.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
     sparse(size, elements.asScala.map { case (i, x) =>
       (i.intValue(), x.doubleValue())
@@ -310,16 +310,16 @@ object Vectors {
    *
    * @param size vector size
    * @return a zero vector
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def zeros(size: Int): Vector = {
     new DenseVector(new Array[Double](size))
   }
 
   /**
    * Parses a string resulted from [[Vector.toString]] into a [[Vector]].
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def parse(s: String): Vector = {
     parseNumeric(NumericParser.parse(s))
   }
@@ -362,8 +362,8 @@ object Vectors {
    * @param vector input vector.
    * @param p norm.
    * @return norm in L^p^ space.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def norm(vector: Vector, p: Double): Double = {
     require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
       s"You specified p=$p.")
@@ -415,8 +415,8 @@ object Vectors {
    * @param v1 first Vector.
    * @param v2 second Vector.
    * @return squared distance between two Vectors.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def sqdist(v1: Vector, v2: Vector): Double = {
     require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
       s"=${v2.size}.")
@@ -529,33 +529,25 @@ object Vectors {
 
 /**
  * A dense vector represented by a value array.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @SQLUserDefinedType(udt = classOf[VectorUDT])
 class DenseVector(val values: Array[Double]) extends Vector {
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   override def size: Int = values.length
 
   override def toString: String = values.mkString("[", ",", "]")
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   override def toArray: Array[Double] = values
 
   private[spark] override def toBreeze: BV[Double] = new BDV[Double](values)
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   override def apply(i: Int): Double = values(i)
 
-  /**
-   * @since 1.1.0
-   */
+  @Since("1.1.0")
   override def copy: DenseVector = {
     new DenseVector(values.clone())
   }
@@ -587,14 +579,10 @@ class DenseVector(val values: Array[Double]) extends Vector {
     result
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def numActives: Int = size
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def numNonzeros: Int = {
     // same as values.count(_ != 0.0) but faster
     var nnz = 0
@@ -606,9 +594,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
     nnz
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def toSparse: SparseVector = {
     val nnz = numNonzeros
     val ii = new Array[Int](nnz)
@@ -624,9 +610,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
     new SparseVector(size, ii, vv)
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def argmax: Int = {
     if (size == 0) {
       -1
@@ -646,9 +630,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
   }
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object DenseVector {
   /** Extracts the value array from a dense vector. */
   def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
@@ -660,8 +642,8 @@ object DenseVector {
  * @param size size of the vector.
  * @param indices index array, assume to be strictly increasing.
  * @param values value array, must have the same length as the index array.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @SQLUserDefinedType(udt = classOf[VectorUDT])
 class SparseVector(
     override val size: Int,
@@ -677,9 +659,7 @@ class SparseVector(
   override def toString: String =
     s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   override def toArray: Array[Double] = {
     val data = new Array[Double](size)
     var i = 0
@@ -691,9 +671,7 @@ class SparseVector(
     data
   }
 
-  /**
-   * @since 1.1.0
-   */
+  @Since("1.1.0")
   override def copy: SparseVector = {
     new SparseVector(size, indices.clone(), values.clone())
   }
@@ -734,14 +712,10 @@ class SparseVector(
     result
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def numActives: Int = values.length
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def numNonzeros: Int = {
     var nnz = 0
     values.foreach { v =>
@@ -752,9 +726,7 @@ class SparseVector(
     nnz
   }
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def toSparse: SparseVector = {
     val nnz = numNonzeros
     if (nnz == numActives) {
@@ -774,9 +746,7 @@ class SparseVector(
     }
   }
 
-  /**
-   * @since 1.5.0
-   */
+  @Since("1.5.0")
   override def argmax: Int = {
     if (size == 0) {
       -1
@@ -847,9 +817,7 @@ class SparseVector(
   }
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object SparseVector {
   def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
     Some((sv.size, sv.indices, sv.values))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index cfb6680a18..94376c24a7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import breeze.linalg.{DenseMatrix => BDM}
 
 import org.apache.spark.{Logging, Partitioner, SparkException}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
@@ -128,9 +128,8 @@ private[mllib] object GridPartitioner {
  *              the number of rows will be calculated when `numRows` is invoked.
  * @param nCols Number of columns of this matrix. If the supplied value is less than or equal to
  *              zero, the number of columns will be calculated when `numCols` is invoked.
- * @since 1.3.0
- *
  */
+@Since("1.3.0")
 @Experimental
 class BlockMatrix(
     val blocks: RDD[((Int, Int), Matrix)],
@@ -151,10 +150,8 @@ class BlockMatrix(
    *                     rows are not required to have the given number of rows
    * @param colsPerBlock Number of columns that make up each block. The blocks forming the final
    *                     columns are not required to have the given number of columns
-   *
-   * @since 1.3.0
-   *
    */
+  @Since("1.3.0")
   def this(
       blocks: RDD[((Int, Int), Matrix)],
       rowsPerBlock: Int,
@@ -162,20 +159,13 @@ class BlockMatrix(
     this(blocks, rowsPerBlock, colsPerBlock, 0L, 0L)
   }
 
-  /**
-   * @since 1.3.0
-   * */
-
+  @Since("1.3.0")
   override def numRows(): Long = {
     if (nRows <= 0L) estimateDim()
     nRows
   }
 
-  /**
-   *
-   * @since 1.3.0
-   */
-
+  @Since("1.3.0")
   override def numCols(): Long = {
     if (nCols <= 0L) estimateDim()
     nCols
@@ -206,8 +196,8 @@ class BlockMatrix(
   /**
    * Validates the block matrix info against the matrix data (`blocks`) and throws an exception if
    * any error is found.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def validate(): Unit = {
     logDebug("Validating BlockMatrix...")
     // check if the matrix is larger than the claimed dimensions
@@ -243,25 +233,22 @@ class BlockMatrix(
     logDebug("BlockMatrix is valid!")
   }
 
-  /** Caches the underlying RDD.
-    * @since 1.3.0
-    * */
+  /** Caches the underlying RDD. */
+  @Since("1.3.0")
   def cache(): this.type = {
     blocks.cache()
     this
   }
 
-  /** Persists the underlying RDD with the specified storage level.
-    * @since 1.3.0
-    * */
+  /** Persists the underlying RDD with the specified storage level. */
+  @Since("1.3.0")
   def persist(storageLevel: StorageLevel): this.type = {
     blocks.persist(storageLevel)
     this
   }
 
-  /** Converts to CoordinateMatrix.
-    * @since 1.3.0
-    * */
+  /** Converts to CoordinateMatrix. */
+  @Since("1.3.0")
   def toCoordinateMatrix(): CoordinateMatrix = {
     val entryRDD = blocks.flatMap { case ((blockRowIndex, blockColIndex), mat) =>
       val rowStart = blockRowIndex.toLong * rowsPerBlock
@@ -275,9 +262,8 @@ class BlockMatrix(
     new CoordinateMatrix(entryRDD, numRows(), numCols())
   }
 
-  /** Converts to IndexedRowMatrix. The number of columns must be within the integer range.
-    * @since 1.3.0
-    * */
+  /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+  @Since("1.3.0")
   def toIndexedRowMatrix(): IndexedRowMatrix = {
     require(numCols() < Int.MaxValue, "The number of columns must be within the integer range. " +
       s"numCols: ${numCols()}")
@@ -285,9 +271,8 @@ class BlockMatrix(
     toCoordinateMatrix().toIndexedRowMatrix()
   }
 
-  /** Collect the distributed matrix on the driver as a `DenseMatrix`.
-    * @since 1.3.0
-    * */
+  /** Collect the distributed matrix on the driver as a `DenseMatrix`. */
+  @Since("1.3.0")
   def toLocalMatrix(): Matrix = {
     require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " +
       s"Int.MaxValue. Currently numRows: ${numRows()}")
@@ -312,11 +297,11 @@ class BlockMatrix(
     new DenseMatrix(m, n, values)
   }
 
-  /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
-    * same underlying data. Is a lazy operation.
-    * @since 1.3.0
-    *
-    * */
+  /**
+   * Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
+   * same underlying data. Is a lazy operation.
+   */
+  @Since("1.3.0")
   def transpose: BlockMatrix = {
     val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) =>
       ((blockColIndex, blockRowIndex), mat.transpose)
@@ -330,13 +315,14 @@ class BlockMatrix(
     new BDM[Double](localMat.numRows, localMat.numCols, localMat.toArray)
   }
 
-  /** Adds two block matrices together. The matrices must have the same size and matching
-    * `rowsPerBlock` and `colsPerBlock` values. If one of the blocks that are being added are
-    * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even
-    * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will
-    * also be a [[DenseMatrix]].
-    * @since 1.3.0
-    */
+  /**
+   * Adds two block matrices together. The matrices must have the same size and matching
+   * `rowsPerBlock` and `colsPerBlock` values. If one of the blocks that are being added are
+   * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even
+   * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will
+   * also be a [[DenseMatrix]].
+   */
+  @Since("1.3.0")
   def add(other: BlockMatrix): BlockMatrix = {
     require(numRows() == other.numRows(), "Both matrices must have the same number of rows. " +
       s"A.numRows: ${numRows()}, B.numRows: ${other.numRows()}")
@@ -364,14 +350,14 @@ class BlockMatrix(
     }
   }
 
-  /** Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
-    * of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
-    * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
-    * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
-    * some performance issues until support for multiplying two sparse matrices is added.
-    *
-    * @since 1.3.0
-    */
+  /**
+   * Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
+   * of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
+   * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
+   * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
+   * some performance issues until support for multiplying two sparse matrices is added.
+   */
+  @Since("1.3.0")
   def multiply(other: BlockMatrix): BlockMatrix = {
     require(numCols() == other.numRows(), "The number of columns of A and the number of rows " +
       s"of B must be equal. A.numCols: ${numCols()}, B.numRows: ${other.numRows()}. If you " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 2b751e45dd..4bb27ec840 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.linalg.distributed
 
 import breeze.linalg.{DenseMatrix => BDM}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
 
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
  * @param i row index
  * @param j column index
  * @param value value of the entry
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 case class MatrixEntry(i: Long, j: Long, value: Double)
 
@@ -43,22 +43,20 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
  *              be determined by the max row index plus one.
  * @param nCols number of columns. A non-positive value means unknown, and then the number of
  *              columns will be determined by the max column index plus one.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 class CoordinateMatrix(
     val entries: RDD[MatrixEntry],
     private var nRows: Long,
     private var nCols: Long) extends DistributedMatrix {
 
-  /** Alternative constructor leaving matrix dimensions to be determined automatically.
-    * @since 1.0.0
-    * */
+  /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+  @Since("1.0.0")
   def this(entries: RDD[MatrixEntry]) = this(entries, 0L, 0L)
 
-  /** Gets or computes the number of columns.
-    * @since 1.0.0
-    * */
+  /** Gets or computes the number of columns. */
+  @Since("1.0.0")
   override def numCols(): Long = {
     if (nCols <= 0L) {
       computeSize()
@@ -66,9 +64,8 @@ class CoordinateMatrix(
     nCols
   }
 
-  /** Gets or computes the number of rows.
-    * @since 1.0.0
-    * */
+  /** Gets or computes the number of rows. */
+  @Since("1.0.0")
   override def numRows(): Long = {
     if (nRows <= 0L) {
       computeSize()
@@ -76,16 +73,14 @@ class CoordinateMatrix(
     nRows
   }
 
-  /** Transposes this CoordinateMatrix.
-    * @since 1.3.0
-    * */
+  /** Transposes this CoordinateMatrix. */
+  @Since("1.3.0")
   def transpose(): CoordinateMatrix = {
     new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
   }
 
-  /** Converts to IndexedRowMatrix. The number of columns must be within the integer range.
-    * @since 1.0.0
-    * */
+  /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+  @Since("1.0.0")
   def toIndexedRowMatrix(): IndexedRowMatrix = {
     val nl = numCols()
     if (nl > Int.MaxValue) {
@@ -104,15 +99,14 @@ class CoordinateMatrix(
   /**
    * Converts to RowMatrix, dropping row indices after grouping by row index.
    * The number of columns must be within the integer range.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def toRowMatrix(): RowMatrix = {
     toIndexedRowMatrix().toRowMatrix()
   }
 
-  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024.
-    * @since 1.3.0
-    * */
+  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+  @Since("1.3.0")
   def toBlockMatrix(): BlockMatrix = {
     toBlockMatrix(1024, 1024)
   }
@@ -124,8 +118,8 @@ class CoordinateMatrix(
    * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
    *                     a smaller value. Must be an integer value greater than 0.
    * @return a [[BlockMatrix]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
     require(rowsPerBlock > 0,
       s"rowsPerBlock needs to be greater than 0. rowsPerBlock: $rowsPerBlock")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
index 98e90af84a..e51327ebb7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
@@ -19,10 +19,12 @@ package org.apache.spark.mllib.linalg.distributed
 
 import breeze.linalg.{DenseMatrix => BDM}
 
+import org.apache.spark.annotation.Since
+
 /**
  * Represents a distributively stored matrix backed by one or more RDDs.
- * @since 1.0.0
  */
+@Since("1.0.0")
 trait DistributedMatrix extends Serializable {
 
   /** Gets or computes the number of rows. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index a09f88ce28..6d2c05a47d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.linalg.distributed
 
 import breeze.linalg.{DenseMatrix => BDM}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.linalg.SingularValueDecomposition
@@ -27,8 +27,8 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition
 /**
  * :: Experimental ::
  * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]].
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 case class IndexedRow(index: Long, vector: Vector)
 
@@ -42,23 +42,19 @@ case class IndexedRow(index: Long, vector: Vector)
  *              be determined by the max row index plus one.
  * @param nCols number of columns. A non-positive value means unknown, and then the number of
  *              columns will be determined by the size of the first row.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 class IndexedRowMatrix(
     val rows: RDD[IndexedRow],
     private var nRows: Long,
     private var nCols: Int) extends DistributedMatrix {
 
-  /** Alternative constructor leaving matrix dimensions to be determined automatically.
-    * @since 1.0.0
-    * */
+  /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+  @Since("1.0.0")
   def this(rows: RDD[IndexedRow]) = this(rows, 0L, 0)
 
-  /**
-   *
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   override def numCols(): Long = {
     if (nCols <= 0) {
       // Calling `first` will throw an exception if `rows` is empty.
@@ -67,10 +63,7 @@ class IndexedRowMatrix(
     nCols
   }
 
-  /**
-   *
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   override def numRows(): Long = {
     if (nRows <= 0L) {
       // Reduce will throw an exception if `rows` is empty.
@@ -82,15 +75,14 @@ class IndexedRowMatrix(
   /**
    * Drops row indices and converts this matrix to a
    * [[org.apache.spark.mllib.linalg.distributed.RowMatrix]].
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def toRowMatrix(): RowMatrix = {
     new RowMatrix(rows.map(_.vector), 0L, nCols)
   }
 
-  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024.
-    * @since 1.3.0
-    * */
+  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+  @Since("1.3.0")
   def toBlockMatrix(): BlockMatrix = {
     toBlockMatrix(1024, 1024)
   }
@@ -102,8 +94,8 @@ class IndexedRowMatrix(
    * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
    *                     a smaller value. Must be an integer value greater than 0.
    * @return a [[BlockMatrix]]
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
     // TODO: This implementation may be optimized
     toCoordinateMatrix().toBlockMatrix(rowsPerBlock, colsPerBlock)
@@ -112,8 +104,8 @@ class IndexedRowMatrix(
   /**
    * Converts this matrix to a
    * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def toCoordinateMatrix(): CoordinateMatrix = {
     val entries = rows.flatMap { row =>
       val rowIndex = row.index
@@ -149,8 +141,8 @@ class IndexedRowMatrix(
    * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
    *              are treated as zero, where sigma(0) is the largest singular value.
    * @return SingularValueDecomposition(U, s, V)
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computeSVD(
       k: Int,
       computeU: Boolean = false,
@@ -176,8 +168,8 @@ class IndexedRowMatrix(
    *
    * @param B a local matrix whose number of rows must match the number of columns of this matrix
    * @return an IndexedRowMatrix representing the product, which preserves partitioning
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def multiply(B: Matrix): IndexedRowMatrix = {
     val mat = toRowMatrix().multiply(B)
     val indexedRows = rows.map(_.index).zip(mat.rows).map { case (i, v) =>
@@ -188,8 +180,8 @@ class IndexedRowMatrix(
 
   /**
    * Computes the Gramian matrix `A^T A`.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computeGramianMatrix(): Matrix = {
     toRowMatrix().computeGramianMatrix()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index b2e94f2dd6..78036eba5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -28,7 +28,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
 import org.apache.spark.Logging
 import org.apache.spark.SparkContext._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
 import org.apache.spark.rdd.RDD
@@ -44,22 +44,20 @@ import org.apache.spark.storage.StorageLevel
  *              be determined by the number of records in the RDD `rows`.
  * @param nCols number of columns. A non-positive value means unknown, and then the number of
  *              columns will be determined by the size of the first row.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 class RowMatrix(
     val rows: RDD[Vector],
     private var nRows: Long,
     private var nCols: Int) extends DistributedMatrix with Logging {
 
-  /** Alternative constructor leaving matrix dimensions to be determined automatically.
-    * @since 1.0.0
-    * */
+  /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+  @Since("1.0.0")
   def this(rows: RDD[Vector]) = this(rows, 0L, 0)
 
-  /** Gets or computes the number of columns.
-    * @since 1.0.0
-    * */
+  /** Gets or computes the number of columns. */
+  @Since("1.0.0")
   override def numCols(): Long = {
     if (nCols <= 0) {
       try {
@@ -74,9 +72,8 @@ class RowMatrix(
     nCols
   }
 
-  /** Gets or computes the number of rows.
-    * @since 1.0.0
-    * */
+  /** Gets or computes the number of rows. */
+  @Since("1.0.0")
   override def numRows(): Long = {
     if (nRows <= 0L) {
       nRows = rows.count()
@@ -114,8 +111,8 @@ class RowMatrix(
 
   /**
    * Computes the Gramian matrix `A^T A`.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computeGramianMatrix(): Matrix = {
     val n = numCols().toInt
     checkNumColumns(n)
@@ -185,8 +182,8 @@ class RowMatrix(
    * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
    *              are treated as zero, where sigma(0) is the largest singular value.
    * @return SingularValueDecomposition(U, s, V). U = null if computeU = false.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computeSVD(
       k: Int,
       computeU: Boolean = false,
@@ -326,8 +323,8 @@ class RowMatrix(
   /**
    * Computes the covariance matrix, treating each row as an observation.
    * @return a local dense matrix of size n x n
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computeCovariance(): Matrix = {
     val n = numCols().toInt
     checkNumColumns(n)
@@ -380,8 +377,8 @@ class RowMatrix(
    *
    * @param k number of top principal components.
    * @return a matrix of size n-by-k, whose columns are principal components
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computePrincipalComponents(k: Int): Matrix = {
     val n = numCols().toInt
     require(k > 0 && k <= n, s"k = $k out of range (0, n = $n]")
@@ -399,8 +396,8 @@ class RowMatrix(
 
   /**
    * Computes column-wise summary statistics.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def computeColumnSummaryStatistics(): MultivariateStatisticalSummary = {
     val summary = rows.treeAggregate(new MultivariateOnlineSummarizer)(
       (aggregator, data) => aggregator.add(data),
@@ -415,8 +412,8 @@ class RowMatrix(
    * @param B a local matrix whose number of rows must match the number of columns of this matrix
    * @return a [[org.apache.spark.mllib.linalg.distributed.RowMatrix]] representing the product,
    *         which preserves partitioning
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def multiply(B: Matrix): RowMatrix = {
     val n = numCols().toInt
     val k = B.numCols
@@ -448,8 +445,8 @@ class RowMatrix(
    *
    * @return An n x n sparse upper-triangular matrix of cosine similarities between
    *         columns of this matrix.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def columnSimilarities(): CoordinateMatrix = {
     columnSimilarities(0.0)
   }
@@ -492,8 +489,8 @@ class RowMatrix(
    *                  with the cost vs estimate quality trade-off described above.
    * @return An n x n sparse upper-triangular matrix of cosine similarities
    *         between columns of this matrix.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def columnSimilarities(threshold: Double): CoordinateMatrix = {
     require(threshold >= 0, s"Threshold cannot be negative: $threshold")
 
@@ -671,9 +668,7 @@ class RowMatrix(
   }
 }
 
-/**
- * @since 1.0.0
- */
+@Since("1.0.0")
 @Experimental
 object RowMatrix {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 56c549ef99..b27ef1b949 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.recommendation
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.ml.recommendation.{ALS => NewALS}
 import org.apache.spark.rdd.RDD
@@ -26,8 +26,8 @@ import org.apache.spark.storage.StorageLevel
 
 /**
  * A more compact class to represent a rating than Tuple3[Int, Int, Double].
- * @since 0.8.0
  */
+@Since("0.8.0")
 case class Rating(user: Int, product: Int, rating: Double)
 
 /**
@@ -255,8 +255,8 @@ class ALS private (
 
 /**
  * Top-level methods for calling Alternating Least Squares (ALS) matrix factorization.
- * @since 0.8.0
  */
+@Since("0.8.0")
 object ALS {
   /**
    * Train a matrix factorization model given an RDD of ratings given by users to some products,
@@ -271,8 +271,8 @@ object ALS {
    * @param lambda     regularization factor (recommended: 0.01)
    * @param blocks     level of parallelism to split computation into
    * @param seed       random seed
-   * @since 0.9.1
    */
+  @Since("0.9.1")
   def train(
       ratings: RDD[Rating],
       rank: Int,
@@ -296,8 +296,8 @@ object ALS {
    * @param iterations number of iterations of ALS (recommended: 10-20)
    * @param lambda     regularization factor (recommended: 0.01)
    * @param blocks     level of parallelism to split computation into
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       ratings: RDD[Rating],
       rank: Int,
@@ -319,8 +319,8 @@ object ALS {
    * @param rank       number of features to use
    * @param iterations number of iterations of ALS (recommended: 10-20)
    * @param lambda     regularization factor (recommended: 0.01)
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double)
     : MatrixFactorizationModel = {
     train(ratings, rank, iterations, lambda, -1)
@@ -336,8 +336,8 @@ object ALS {
    * @param ratings    RDD of (userID, productID, rating) pairs
    * @param rank       number of features to use
    * @param iterations number of iterations of ALS (recommended: 10-20)
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(ratings: RDD[Rating], rank: Int, iterations: Int)
     : MatrixFactorizationModel = {
     train(ratings, rank, iterations, 0.01, -1)
@@ -357,8 +357,8 @@ object ALS {
    * @param blocks     level of parallelism to split computation into
    * @param alpha      confidence parameter
    * @param seed       random seed
-   * @since 0.8.1
    */
+  @Since("0.8.1")
   def trainImplicit(
       ratings: RDD[Rating],
       rank: Int,
@@ -384,8 +384,8 @@ object ALS {
    * @param lambda     regularization factor (recommended: 0.01)
    * @param blocks     level of parallelism to split computation into
    * @param alpha      confidence parameter
-   * @since 0.8.1
    */
+  @Since("0.8.1")
   def trainImplicit(
       ratings: RDD[Rating],
       rank: Int,
@@ -409,8 +409,8 @@ object ALS {
    * @param iterations number of iterations of ALS (recommended: 10-20)
    * @param lambda     regularization factor (recommended: 0.01)
    * @param alpha      confidence parameter
-   * @since 0.8.1
    */
+  @Since("0.8.1")
   def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, alpha: Double)
     : MatrixFactorizationModel = {
     trainImplicit(ratings, rank, iterations, lambda, -1, alpha)
@@ -427,8 +427,8 @@ object ALS {
    * @param ratings    RDD of (userID, productID, rating) pairs
    * @param rank       number of features to use
    * @param iterations number of iterations of ALS (recommended: 10-20)
-   * @since 0.8.1
    */
+  @Since("0.8.1")
   def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int)
     : MatrixFactorizationModel = {
     trainImplicit(ratings, rank, iterations, 0.01, -1, 1.0)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 261ca9cef0..ba4cfdcd9f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -30,6 +30,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
@@ -49,8 +50,8 @@ import org.apache.spark.storage.StorageLevel
  *                     the features computed for this user.
  * @param productFeatures RDD of tuples where each tuple represents the productId
  *                        and the features computed for this product.
- * @since 0.8.0
  */
+@Since("0.8.0")
 class MatrixFactorizationModel(
     val rank: Int,
     val userFeatures: RDD[(Int, Array[Double])],
@@ -74,9 +75,8 @@ class MatrixFactorizationModel(
     }
   }
 
-  /** Predict the rating of one user for one product.
-   * @since 0.8.0
-   */
+  /** Predict the rating of one user for one product. */
+  @Since("0.8.0")
   def predict(user: Int, product: Int): Double = {
     val userVector = userFeatures.lookup(user).head
     val productVector = productFeatures.lookup(product).head
@@ -114,8 +114,8 @@ class MatrixFactorizationModel(
    *
    * @param usersProducts  RDD of (user, product) pairs.
    * @return RDD of Ratings.
-   * @since 0.9.0
    */
+  @Since("0.9.0")
   def predict(usersProducts: RDD[(Int, Int)]): RDD[Rating] = {
     // Previously the partitions of ratings are only based on the given products.
     // So if the usersProducts given for prediction contains only few products or
@@ -146,8 +146,8 @@ class MatrixFactorizationModel(
 
   /**
    * Java-friendly version of [[MatrixFactorizationModel.predict]].
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
     predict(usersProducts.rdd.asInstanceOf[RDD[(Int, Int)]]).toJavaRDD()
   }
@@ -162,8 +162,8 @@ class MatrixFactorizationModel(
    *  by score, decreasing. The first returned is the one predicted to be most strongly
    *  recommended to the user. The score is an opaque value that indicates how strongly
    *  recommended the product is.
-   *  @since 1.1.0
    */
+  @Since("1.1.0")
   def recommendProducts(user: Int, num: Int): Array[Rating] =
     MatrixFactorizationModel.recommend(userFeatures.lookup(user).head, productFeatures, num)
       .map(t => Rating(user, t._1, t._2))
@@ -179,8 +179,8 @@ class MatrixFactorizationModel(
    *  by score, decreasing. The first returned is the one predicted to be most strongly
    *  recommended to the product. The score is an opaque value that indicates how strongly
    *  recommended the user is.
-   *  @since 1.1.0
    */
+  @Since("1.1.0")
   def recommendUsers(product: Int, num: Int): Array[Rating] =
     MatrixFactorizationModel.recommend(productFeatures.lookup(product).head, userFeatures, num)
       .map(t => Rating(t._1, product, t._2))
@@ -199,8 +199,8 @@ class MatrixFactorizationModel(
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
    *              If the directory already exists, this method throws an exception.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     MatrixFactorizationModel.SaveLoadV1_0.save(this, path)
   }
@@ -212,8 +212,8 @@ class MatrixFactorizationModel(
    * @return [(Int, Array[Rating])] objects, where every tuple contains a userID and an array of
    * rating objects which contains the same userId, recommended productID and a "score" in the
    * rating field. Semantics of score is same as recommendProducts API
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def recommendProductsForUsers(num: Int): RDD[(Int, Array[Rating])] = {
     MatrixFactorizationModel.recommendForAll(rank, userFeatures, productFeatures, num).map {
       case (user, top) =>
@@ -230,8 +230,8 @@ class MatrixFactorizationModel(
    * @return [(Int, Array[Rating])] objects, where every tuple contains a productID and an array
    * of rating objects which contains the recommended userId, same productID and a "score" in the
    * rating field. Semantics of score is same as recommendUsers API
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def recommendUsersForProducts(num: Int): RDD[(Int, Array[Rating])] = {
     MatrixFactorizationModel.recommendForAll(rank, productFeatures, userFeatures, num).map {
       case (product, top) =>
@@ -241,9 +241,7 @@ class MatrixFactorizationModel(
   }
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
 
   import org.apache.spark.mllib.util.Loader._
@@ -326,8 +324,8 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
    * @return  Model instance
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): MatrixFactorizationModel = {
     val (loadedClassName, formatVersion, _) = loadMetadata(sc, path)
     val classNameV1_0 = SaveLoadV1_0.thisClassName
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 2980b94de3..509f6a2d16 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.feature.StandardScaler
 import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.rdd.RDD
@@ -35,8 +35,8 @@ import org.apache.spark.storage.StorageLevel
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 @DeveloperApi
 abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
   extends Serializable {
@@ -56,8 +56,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(testData: RDD[Vector]): RDD[Double] = {
     // A small optimization to avoid serializing the entire model. Only the weightsMatrix
     // and intercept is needed.
@@ -76,8 +76,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(testData: Vector): Double = {
     predictPoint(testData, weights, intercept)
   }
@@ -95,8 +95,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
  * GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).
  * This class should be extended with an Optimizer to create a new GLM.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 @DeveloperApi
 abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   extends Logging with Serializable {
@@ -106,8 +106,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * The optimizer to solve the problem.
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def optimizer: Optimizer
 
   /** Whether to add intercept (default: false). */
@@ -143,8 +143,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * The dimension of training features.
    *
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def getNumFeatures: Int = this.numFeatures
 
   /**
@@ -168,16 +168,16 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * Get if the algorithm uses addIntercept
    *
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def isAddIntercept: Boolean = this.addIntercept
 
   /**
    * Set if the algorithm should add an intercept. Default false.
    * We set the default to false because adding the intercept will cause memory allocation.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setIntercept(addIntercept: Boolean): this.type = {
     this.addIntercept = addIntercept
     this
@@ -186,8 +186,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * Set if the algorithm should validate data before training. Default true.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def setValidateData(validateData: Boolean): this.type = {
     this.validateData = validateData
     this
@@ -197,8 +197,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    * Run the algorithm with the configured parameters on an input
    * RDD of LabeledPoint entries.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def run(input: RDD[LabeledPoint]): M = {
     if (numFeatures < 0) {
       numFeatures = input.map(_.features.size).first()
@@ -231,8 +231,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
 
     if (numFeatures < 0) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 8995591d9e..31ca7c2f20 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -29,7 +29,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -47,8 +47,8 @@ import org.apache.spark.sql.SQLContext
  *                    Results of isotonic regression and therefore monotone.
  * @param isotonic indicates whether this is isotonic or antitonic.
  *
- * @since 1.3.0
  */
+@Since("1.3.0")
 @Experimental
 class IsotonicRegressionModel (
     val boundaries: Array[Double],
@@ -64,8 +64,8 @@ class IsotonicRegressionModel (
   /**
    * A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
    *
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def this(boundaries: java.lang.Iterable[Double],
       predictions: java.lang.Iterable[Double],
       isotonic: java.lang.Boolean) = {
@@ -90,8 +90,8 @@ class IsotonicRegressionModel (
    * @param testData Features to be labeled.
    * @return Predicted labels.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predict(testData: RDD[Double]): RDD[Double] = {
     testData.map(predict)
   }
@@ -103,8 +103,8 @@ class IsotonicRegressionModel (
    * @param testData Features to be labeled.
    * @return Predicted labels.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
     JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
   }
@@ -125,8 +125,8 @@ class IsotonicRegressionModel (
    *           as piecewise linear function and interpolated value is returned. In case there are
    *           multiple values with the same boundary then the same rules as in 2) are used.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predict(testData: Double): Double = {
 
     def linearInterpolation(x1: Double, y1: Double, x2: Double, y2: Double, x: Double): Double = {
@@ -160,9 +160,7 @@ class IsotonicRegressionModel (
   /** A convenient method for boundaries called by the Python API. */
   private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
 
-  /**
-   * @since 1.4.0
-   */
+  @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
     IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
   }
@@ -170,9 +168,7 @@ class IsotonicRegressionModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
 object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
 
   import org.apache.spark.mllib.util.Loader._
@@ -219,8 +215,8 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
   }
 
   /**
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
     implicit val formats = DefaultFormats
     val (loadedClassName, version, metadata) = loadMetadata(sc, path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index 8b51011eeb..f7fe1b7b21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.regression
 
 import scala.beans.BeanInfo
 
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 import org.apache.spark.mllib.util.NumericParser
 import org.apache.spark.SparkException
@@ -29,8 +30,8 @@ import org.apache.spark.SparkException
  * @param label Label for this data point.
  * @param features List of features for this data point.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 @BeanInfo
 case class LabeledPoint(label: Double, features: Vector) {
   override def toString: String = {
@@ -41,15 +42,15 @@ case class LabeledPoint(label: Double, features: Vector) {
 /**
  * Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
  *
- * @since 1.1.0
  */
+@Since("1.1.0")
 object LabeledPoint {
   /**
    * Parses a string resulted from `LabeledPoint#toString` into
    * an [[org.apache.spark.mllib.regression.LabeledPoint]].
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def parse(s: String): LabeledPoint = {
     if (s.startsWith("(")) {
       NumericParser.parse(s) match {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 03eb589b05..556411a366 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.regression
 
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
@@ -31,8 +32,8 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 class LassoModel (
     override val weights: Vector,
     override val intercept: Double)
@@ -46,9 +47,7 @@ class LassoModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
@@ -56,14 +55,10 @@ class LassoModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object LassoModel extends Loader[LassoModel] {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): LassoModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -118,8 +113,8 @@ class LassoWithSGD private (
 /**
  * Top-level methods for calling Lasso.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 object LassoWithSGD {
 
   /**
@@ -137,8 +132,8 @@ object LassoWithSGD {
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -162,8 +157,8 @@ object LassoWithSGD {
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -185,8 +180,8 @@ object LassoWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LassoModel which has the weights and offset from training.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -205,8 +200,8 @@ object LassoWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LassoModel which has the weights and offset from training.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): LassoModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index fb5c220daa..00ab06e3ba 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.regression
 
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
@@ -31,8 +32,8 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 class LinearRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
@@ -46,9 +47,7 @@ class LinearRegressionModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
@@ -56,14 +55,10 @@ class LinearRegressionModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object LinearRegressionModel extends Loader[LinearRegressionModel] {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): LinearRegressionModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -117,8 +112,8 @@ class LinearRegressionWithSGD private[mllib] (
 /**
  * Top-level methods for calling LinearRegression.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 object LinearRegressionWithSGD {
 
   /**
@@ -135,8 +130,8 @@ object LinearRegressionWithSGD {
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -158,8 +153,8 @@ object LinearRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param miniBatchFraction Fraction of data to be used per iteration.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -179,8 +174,8 @@ object LinearRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LinearRegressionModel which has the weights and offset from training.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -198,8 +193,8 @@ object LinearRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LinearRegressionModel which has the weights and offset from training.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): LinearRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index b097fd38fd..0e72d6591c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -19,14 +19,12 @@ package org.apache.spark.mllib.regression
 
 import org.json4s.{DefaultFormats, JValue}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.rdd.RDD
 
-/**
- * @since 0.8.0
- */
+@Since("0.8.0")
 @Experimental
 trait RegressionModel extends Serializable {
   /**
@@ -35,8 +33,8 @@ trait RegressionModel extends Serializable {
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(testData: RDD[Vector]): RDD[Double]
 
   /**
@@ -45,8 +43,8 @@ trait RegressionModel extends Serializable {
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(testData: Vector): Double
 
   /**
@@ -54,8 +52,8 @@ trait RegressionModel extends Serializable {
    * @param testData JavaRDD representing data points to be predicted
    * @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
    *
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 5bced6b4b7..21a791d98b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.regression
 
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
@@ -32,8 +33,8 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 class RidgeRegressionModel (
     override val weights: Vector,
     override val intercept: Double)
@@ -47,9 +48,7 @@ class RidgeRegressionModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
@@ -57,14 +56,10 @@ class RidgeRegressionModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
 
-  /**
-   * @since 1.3.0
-   */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): RidgeRegressionModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -120,8 +115,8 @@ class RidgeRegressionWithSGD private (
 /**
  * Top-level methods for calling RidgeRegression.
  *
- * @since 0.8.0
  */
+@Since("0.8.0")
 object RidgeRegressionWithSGD {
 
   /**
@@ -138,8 +133,8 @@ object RidgeRegressionWithSGD {
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -162,8 +157,8 @@ object RidgeRegressionWithSGD {
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -184,8 +179,8 @@ object RidgeRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a RidgeRegressionModel which has the weights and offset from training.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int,
@@ -203,8 +198,8 @@ object RidgeRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a RidgeRegressionModel which has the weights and offset from training.
    *
-   * @since 0.8.0
    */
+  @Since("0.8.0")
   def train(
       input: RDD[LabeledPoint],
       numIterations: Int): RidgeRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index a2ab95c474..cd3ed8a154 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.regression
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream}
@@ -54,8 +54,8 @@ import org.apache.spark.streaming.dstream.DStream
  * the model using each of the different sources, in sequence.
  *
  *
- * @since 1.1.0
  */
+@Since("1.1.0")
 @DeveloperApi
 abstract class StreamingLinearAlgorithm[
     M <: GeneralizedLinearModel,
@@ -70,8 +70,8 @@ abstract class StreamingLinearAlgorithm[
   /**
    * Return the latest model.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def latestModel(): M = {
     model.get
   }
@@ -84,8 +84,8 @@ abstract class StreamingLinearAlgorithm[
    *
    * @param data DStream containing labeled data
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def trainOn(data: DStream[LabeledPoint]): Unit = {
     if (model.isEmpty) {
       throw new IllegalArgumentException("Model must be initialized before starting training.")
@@ -106,8 +106,8 @@ abstract class StreamingLinearAlgorithm[
   /**
    * Java-friendly version of `trainOn`.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
 
   /**
@@ -116,8 +116,8 @@ abstract class StreamingLinearAlgorithm[
    * @param data DStream containing feature vectors
    * @return DStream containing predictions
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def predictOn(data: DStream[Vector]): DStream[Double] = {
     if (model.isEmpty) {
       throw new IllegalArgumentException("Model must be initialized before starting prediction.")
@@ -128,8 +128,8 @@ abstract class StreamingLinearAlgorithm[
   /**
    * Java-friendly version of `predictOn`.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = {
     JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]])
   }
@@ -140,8 +140,8 @@ abstract class StreamingLinearAlgorithm[
    * @tparam K key type
    * @return DStream containing the input keys and the predictions as values
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = {
     if (model.isEmpty) {
       throw new IllegalArgumentException("Model must be initialized before starting prediction")
@@ -153,8 +153,8 @@ abstract class StreamingLinearAlgorithm[
   /**
    * Java-friendly version of `predictOnValues`.
    *
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = {
     implicit val tag = fakeClassTag[K]
     JavaPairDStream.fromPairDStream(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index 93a6753efd..4a856f7f34 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
 
@@ -37,8 +37,8 @@ import org.apache.spark.rdd.RDD
  *   .setBandwidth(3.0)
  * val densities = kd.estimate(Array(-1.0, 2.0, 5.0))
  * }}}
- * @since 1.4.0
  */
+@Since("1.4.0")
 @Experimental
 class KernelDensity extends Serializable {
 
@@ -52,8 +52,8 @@ class KernelDensity extends Serializable {
 
   /**
    * Sets the bandwidth (standard deviation) of the Gaussian kernel (default: `1.0`).
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setBandwidth(bandwidth: Double): this.type = {
     require(bandwidth > 0, s"Bandwidth must be positive, but got $bandwidth.")
     this.bandwidth = bandwidth
@@ -62,8 +62,8 @@ class KernelDensity extends Serializable {
 
   /**
    * Sets the sample to use for density estimation.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setSample(sample: RDD[Double]): this.type = {
     this.sample = sample
     this
@@ -71,8 +71,8 @@ class KernelDensity extends Serializable {
 
   /**
    * Sets the sample to use for density estimation (for Java users).
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def setSample(sample: JavaRDD[java.lang.Double]): this.type = {
     this.sample = sample.rdd.asInstanceOf[RDD[Double]]
     this
@@ -80,8 +80,8 @@ class KernelDensity extends Serializable {
 
   /**
    * Estimates probability density function at the given array of points.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def estimate(points: Array[Double]): Array[Double] = {
     val sample = this.sample
     val bandwidth = this.bandwidth
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 64e4be0ebb..51b713e263 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.stat
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
@@ -33,8 +33,8 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
  * Reference: [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]]
  * Zero elements (including explicit zero values) are skipped when calling add(),
  * to have time complexity O(nnz) instead of O(n) for each column.
- * @since 1.1.0
  */
+@Since("1.1.0")
 @DeveloperApi
 class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with Serializable {
 
@@ -53,8 +53,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
    *
    * @param sample The sample in dense/sparse vector format to be added into this summarizer.
    * @return This MultivariateOnlineSummarizer object.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def add(sample: Vector): this.type = {
     if (n == 0) {
       require(sample.size > 0, s"Vector should have dimension larger than zero.")
@@ -109,8 +109,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
    *
    * @param other The other MultivariateOnlineSummarizer to be merged.
    * @return This MultivariateOnlineSummarizer object.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def merge(other: MultivariateOnlineSummarizer): this.type = {
     if (this.totalCnt != 0 && other.totalCnt != 0) {
       require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " +
@@ -155,8 +155,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * Sample mean of each dimension.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   override def mean: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
@@ -172,8 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * Sample variance of each dimension.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   override def variance: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
@@ -199,15 +199,15 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * Sample size.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   override def count: Long = totalCnt
 
   /**
    * Number of nonzero elements in each dimension.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   override def numNonzeros: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
@@ -217,8 +217,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * Maximum value of each dimension.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   override def max: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
@@ -233,8 +233,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * Minimum value of each dimension.
    *
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   override def min: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
@@ -249,8 +249,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * L2 (Euclidian) norm of each dimension.
    *
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   override def normL2: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
@@ -268,8 +268,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
   /**
    * L1 norm of each dimension.
    *
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   override def normL1: Vector = {
     require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
index 3bb49f1228..39a16fb743 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
@@ -17,59 +17,60 @@
 
 package org.apache.spark.mllib.stat
 
+import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.Vector
 
 /**
  * Trait for multivariate statistical summary of a data matrix.
- * @since 1.0.0
  */
+@Since("1.0.0")
 trait MultivariateStatisticalSummary {
 
   /**
    * Sample mean vector.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def mean: Vector
 
   /**
    * Sample variance vector. Should return a zero vector if the sample size is 1.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def variance: Vector
 
   /**
    * Sample size.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def count: Long
 
   /**
    * Number of nonzero elements (including explicitly presented zero values) in each column.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def numNonzeros: Vector
 
   /**
    * Maximum value of each column.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def max: Vector
 
   /**
    * Minimum value of each column.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def min: Vector
 
   /**
    * Euclidean magnitude of each column
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def normL2: Vector
 
   /**
    * L1 norm of each column
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def normL1: Vector
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index ef8d786070..84d64a5bfb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat
 
 import scala.annotation.varargs
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.{JavaRDD, JavaDoubleRDD}
 import org.apache.spark.mllib.linalg.distributed.RowMatrix
 import org.apache.spark.mllib.linalg.{Matrix, Vector}
@@ -32,8 +32,8 @@ import org.apache.spark.rdd.RDD
 /**
  * :: Experimental ::
  * API for statistical functions in MLlib.
- * @since 1.1.0
  */
+@Since("1.1.0")
 @Experimental
 object Statistics {
 
@@ -42,8 +42,8 @@ object Statistics {
    *
    * @param X an RDD[Vector] for which column-wise summary statistics are to be computed.
    * @return [[MultivariateStatisticalSummary]] object containing column-wise summary statistics.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def colStats(X: RDD[Vector]): MultivariateStatisticalSummary = {
     new RowMatrix(X).computeColumnSummaryStatistics()
   }
@@ -54,8 +54,8 @@ object Statistics {
    *
    * @param X an RDD[Vector] for which the correlation matrix is to be computed.
    * @return Pearson correlation matrix comparing columns in X.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def corr(X: RDD[Vector]): Matrix = Correlations.corrMatrix(X)
 
   /**
@@ -71,8 +71,8 @@ object Statistics {
    * @param method String specifying the method to use for computing correlation.
    *               Supported: `pearson` (default), `spearman`
    * @return Correlation matrix comparing columns in X.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def corr(X: RDD[Vector], method: String): Matrix = Correlations.corrMatrix(X, method)
 
   /**
@@ -85,14 +85,14 @@ object Statistics {
    * @param x RDD[Double] of the same cardinality as y.
    * @param y RDD[Double] of the same cardinality as x.
    * @return A Double containing the Pearson correlation between the two input RDD[Double]s
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
 
   /**
    * Java-friendly version of [[corr()]]
-   * @since 1.4.1
    */
+  @Since("1.4.1")
   def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double]): Double =
     corr(x.rdd.asInstanceOf[RDD[Double]], y.rdd.asInstanceOf[RDD[Double]])
 
@@ -109,14 +109,14 @@ object Statistics {
    *               Supported: `pearson` (default), `spearman`
    * @return A Double containing the correlation between the two input RDD[Double]s using the
    *         specified method.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
 
   /**
    * Java-friendly version of [[corr()]]
-   * @since 1.4.1
    */
+  @Since("1.4.1")
   def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double], method: String): Double =
     corr(x.rdd.asInstanceOf[RDD[Double]], y.rdd.asInstanceOf[RDD[Double]], method)
 
@@ -133,8 +133,8 @@ object Statistics {
    *                 `expected` is rescaled if the `expected` sum differs from the `observed` sum.
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
     ChiSqTest.chiSquared(observed, expected)
   }
@@ -148,8 +148,8 @@ object Statistics {
    * @param observed Vector containing the observed categorical counts/relative frequencies.
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def chiSqTest(observed: Vector): ChiSqTestResult = ChiSqTest.chiSquared(observed)
 
   /**
@@ -159,8 +159,8 @@ object Statistics {
    * @param observed The contingency matrix (containing either counts or relative frequencies).
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def chiSqTest(observed: Matrix): ChiSqTestResult = ChiSqTest.chiSquaredMatrix(observed)
 
   /**
@@ -172,13 +172,14 @@ object Statistics {
    *             Real-valued features will be treated as categorical for each distinct value.
    * @return an array containing the ChiSquaredTestResult for every feature against the label.
    *         The order of the elements in the returned array reflects the order of input features.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = {
     ChiSqTest.chiSquaredFeatures(data)
   }
 
   /** Java-friendly version of [[chiSqTest()]] */
+  @Since("1.5.0")
   def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = chiSqTest(data.rdd)
 
   /**
@@ -194,6 +195,7 @@ object Statistics {
    * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test
    *        statistic, p-value, and null hypothesis.
    */
+  @Since("1.5.0")
   def kolmogorovSmirnovTest(data: RDD[Double], cdf: Double => Double)
     : KolmogorovSmirnovTestResult = {
     KolmogorovSmirnovTest.testOneSample(data, cdf)
@@ -210,6 +212,7 @@ object Statistics {
    * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test
    *        statistic, p-value, and null hypothesis.
    */
+  @Since("1.5.0")
   @varargs
   def kolmogorovSmirnovTest(data: RDD[Double], distName: String, params: Double*)
     : KolmogorovSmirnovTestResult = {
@@ -217,6 +220,7 @@ object Statistics {
   }
 
   /** Java-friendly version of [[kolmogorovSmirnovTest()]] */
+  @Since("1.5.0")
   @varargs
   def kolmogorovSmirnovTest(
       data: JavaDoubleRDD,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
index 9aa7763d78..bd4d81390b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat.distribution
 
 import breeze.linalg.{DenseVector => DBV, DenseMatrix => DBM, diag, max, eigSym, Vector => BV}
 
-import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.{Vectors, Vector, Matrices, Matrix}
 import org.apache.spark.mllib.util.MLUtils
 
@@ -32,8 +32,8 @@ import org.apache.spark.mllib.util.MLUtils
  *
  * @param mu The mean vector of the distribution
  * @param sigma The covariance matrix of the distribution
- * @since 1.3.0
  */
+@Since("1.3.0")
 @DeveloperApi
 class MultivariateGaussian (
     val mu: Vector,
@@ -62,15 +62,15 @@ class MultivariateGaussian (
   private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants
 
   /** Returns density of this multivariate Gaussian at given point, x
-    * @since 1.3.0
     */
+   @Since("1.3.0")
   def pdf(x: Vector): Double = {
     pdf(x.toBreeze)
   }
 
   /** Returns the log-density of this multivariate Gaussian at given point, x
-    * @since 1.3.0
     */
+   @Since("1.3.0")
   def logpdf(x: Vector): Double = {
     logpdf(x.toBreeze)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index e5200b86fd..972841015d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.RandomForest.NodeIndexInfo
@@ -43,8 +43,8 @@ import org.apache.spark.util.random.XORShiftRandom
  * @param strategy The configuration parameters for the tree algorithm which specify the type
  *                 of algorithm (classification, regression, etc.), feature type (continuous,
  *                 categorical), depth of the tree, quantile calculation strategy, etc.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
 
@@ -54,8 +54,8 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
    * Method to train a decision tree model over an RDD
    * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def run(input: RDD[LabeledPoint]): DecisionTreeModel = {
     // Note: random seed will not be used since numTrees = 1.
     val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0)
@@ -64,9 +64,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
   }
 }
 
-/**
- * @since 1.0.0
- */
+@Since("1.0.0")
 object DecisionTree extends Serializable with Logging {
 
   /**
@@ -84,8 +82,8 @@ object DecisionTree extends Serializable with Logging {
    *                 of algorithm (classification, regression, etc.), feature type (continuous,
    *                 categorical), depth of the tree, quantile calculation strategy, etc.
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.0.0
-  */
+   */
+ @Since("1.0.0")
   def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = {
     new DecisionTree(strategy).run(input)
   }
@@ -106,8 +104,8 @@ object DecisionTree extends Serializable with Logging {
    * @param maxDepth Maximum depth of the tree.
    *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       algo: Algo,
@@ -134,8 +132,8 @@ object DecisionTree extends Serializable with Logging {
    *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
    * @param numClasses number of classes for classification. Default value of 2.
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def train(
       input: RDD[LabeledPoint],
       algo: Algo,
@@ -168,8 +166,8 @@ object DecisionTree extends Serializable with Logging {
    *                                E.g., an entry (n -> k) indicates that feature n is categorical
    *                                with k categories indexed from 0: {0, 1, ..., k-1}.
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def train(
       input: RDD[LabeledPoint],
       algo: Algo,
@@ -201,8 +199,8 @@ object DecisionTree extends Serializable with Logging {
    * @param maxBins maximum number of bins used for splitting features
    *                 (suggested value: 32)
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def trainClassifier(
       input: RDD[LabeledPoint],
       numClasses: Int,
@@ -217,8 +215,8 @@ object DecisionTree extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def trainClassifier(
       input: JavaRDD[LabeledPoint],
       numClasses: Int,
@@ -247,8 +245,8 @@ object DecisionTree extends Serializable with Logging {
    * @param maxBins maximum number of bins used for splitting features
    *                 (suggested value: 32)
    * @return DecisionTreeModel that can be used for prediction
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def trainRegressor(
       input: RDD[LabeledPoint],
       categoricalFeaturesInfo: Map[Int, Int],
@@ -261,8 +259,8 @@ object DecisionTree extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def trainRegressor(
       input: JavaRDD[LabeledPoint],
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index 1436170986..e750408600 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.impl.PeriodicRDDCheckpointer
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -48,8 +48,8 @@ import org.apache.spark.storage.StorageLevel
  *       for other loss functions.
  *
  * @param boostingStrategy Parameters for the gradient boosting algorithm.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
   extends Serializable with Logging {
@@ -58,8 +58,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
    * Method to train a gradient boosting model
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return a gradient boosted trees model that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
     val algo = boostingStrategy.treeStrategy.algo
     algo match {
@@ -76,8 +76,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]].
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
     run(input.rdd)
   }
@@ -91,8 +91,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
    *                        E.g., these two datasets could be created from an original dataset
    *                        by using [[org.apache.spark.rdd.RDD.randomSplit()]]
    * @return a gradient boosted trees model that can be used for prediction
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def runWithValidation(
       input: RDD[LabeledPoint],
       validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = {
@@ -115,8 +115,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]].
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def runWithValidation(
       input: JavaRDD[LabeledPoint],
       validationInput: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
@@ -124,9 +124,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
   }
 }
 
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
 object GradientBoostedTrees extends Logging {
 
   /**
@@ -137,8 +135,8 @@ object GradientBoostedTrees extends Logging {
    *              For regression, labels are real numbers.
    * @param boostingStrategy Configuration options for the boosting algorithm.
    * @return a gradient boosted trees model that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def train(
       input: RDD[LabeledPoint],
       boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
@@ -147,8 +145,8 @@ object GradientBoostedTrees extends Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def train(
       input: JavaRDD[LabeledPoint],
       boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 9f3230656a..63a902f3eb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import scala.collection.JavaConverters._
 
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.Strategy
@@ -260,9 +260,7 @@ private class RandomForest (
 
 }
 
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
 object RandomForest extends Serializable with Logging {
 
   /**
@@ -279,8 +277,8 @@ object RandomForest extends Serializable with Logging {
    *                                if numTrees > 1 (forest) set to "sqrt".
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainClassifier(
       input: RDD[LabeledPoint],
       strategy: Strategy,
@@ -317,8 +315,8 @@ object RandomForest extends Serializable with Logging {
    *                 (suggested value: 100)
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model  that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainClassifier(
       input: RDD[LabeledPoint],
       numClasses: Int,
@@ -337,8 +335,8 @@ object RandomForest extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainClassifier]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainClassifier(
       input: JavaRDD[LabeledPoint],
       numClasses: Int,
@@ -368,8 +366,8 @@ object RandomForest extends Serializable with Logging {
    *                                if numTrees > 1 (forest) set to "onethird".
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainRegressor(
       input: RDD[LabeledPoint],
       strategy: Strategy,
@@ -405,8 +403,8 @@ object RandomForest extends Serializable with Logging {
    *                 (suggested value: 100)
    * @param seed  Random seed for bootstrapping and choosing feature subsets.
    * @return a random forest model that can be used for prediction
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainRegressor(
       input: RDD[LabeledPoint],
       categoricalFeaturesInfo: Map[Int, Int],
@@ -424,8 +422,8 @@ object RandomForest extends Serializable with Logging {
 
   /**
    * Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainRegressor]]
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def trainRegressor(
       input: JavaRDD[LabeledPoint],
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
@@ -442,8 +440,8 @@ object RandomForest extends Serializable with Logging {
 
   /**
    * List of supported feature subset sampling strategies.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   val supportedFeatureSubsetStrategies: Array[String] =
     Array("auto", "all", "sqrt", "log2", "onethird")
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index d9a49aa71f..8301ad1608 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.mllib.tree.configuration
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 
 /**
  * :: Experimental ::
  * Enum to select the algorithm for the decision tree
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 object Algo extends Enumeration {
   type Algo = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
index 88e5f57e9a..7c56998197 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.tree.configuration
 
 import scala.beans.BeanProperty
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
 
@@ -38,8 +38,8 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
  *                      validation input between two iterations is less than the validationTol
  *                      then stop.  Ignored when
  *                      [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 case class BoostingStrategy(
     // Required boosting parameters
@@ -71,9 +71,7 @@ case class BoostingStrategy(
   }
 }
 
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
 @Experimental
 object BoostingStrategy {
 
@@ -81,8 +79,8 @@ object BoostingStrategy {
    * Returns default configuration for the boosting algorithm
    * @param algo Learning goal.  Supported: "Classification" or "Regression"
    * @return Configuration for boosting algorithm
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def defaultParams(algo: String): BoostingStrategy = {
     defaultParams(Algo.fromString(algo))
   }
@@ -93,8 +91,8 @@ object BoostingStrategy {
    *             [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
    *             [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
    * @return Configuration for boosting algorithm
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def defaultParams(algo: Algo): BoostingStrategy = {
     val treeStrategy = Strategy.defaultStrategy(algo)
     treeStrategy.maxDepth = 3
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
index 0684cafa48..bb7c7ee4f9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.mllib.tree.configuration
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 
 /**
  * :: Experimental ::
  * Enum to describe whether a feature is "continuous" or "categorical"
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 object FeatureType extends Enumeration {
   type FeatureType = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
index 2daa63c4d2..904e42deeb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.mllib.tree.configuration
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 
 /**
  * :: Experimental ::
  * Enum for selecting the quantile calculation strategy
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 object QuantileStrategy extends Enumeration {
   type QuantileStrategy = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 7ae25a88bf..a58f01ba85 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.tree.configuration
 import scala.beans.BeanProperty
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.tree.impurity.{Variance, Entropy, Gini, Impurity}
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
@@ -66,8 +66,8 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
  *                           E.g. 10 means that the cache will get checkpointed every 10 updates. If
  *                           the checkpoint directory is not set in
  *                           [[org.apache.spark.SparkContext]], this setting is ignored.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 class Strategy (
     @BeanProperty var algo: Algo,
@@ -85,23 +85,23 @@ class Strategy (
     @BeanProperty var checkpointInterval: Int = 10) extends Serializable {
 
   /**
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def isMulticlassClassification: Boolean = {
     algo == Classification && numClasses > 2
   }
 
   /**
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def isMulticlassWithCategoricalFeatures: Boolean = {
     isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
   }
 
   /**
    * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def this(
       algo: Algo,
       impurity: Impurity,
@@ -115,8 +115,8 @@ class Strategy (
 
   /**
    * Sets Algorithm using a String.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setAlgo(algo: String): Unit = algo match {
     case "Classification" => setAlgo(Classification)
     case "Regression" => setAlgo(Regression)
@@ -124,8 +124,8 @@ class Strategy (
 
   /**
    * Sets categoricalFeaturesInfo using a Java Map.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def setCategoricalFeaturesInfo(
       categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = {
     this.categoricalFeaturesInfo =
@@ -174,8 +174,8 @@ class Strategy (
 
   /**
    * Returns a shallow copy of this instance.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def copy: Strategy = {
     new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
       quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain,
@@ -183,17 +183,15 @@ class Strategy (
   }
 }
 
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
 @Experimental
 object Strategy {
 
   /**
    * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
    * @param algo  "Classification" or "Regression"
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def defaultStrategy(algo: String): Strategy = {
     defaultStrategy(Algo.fromString(algo))
   }
@@ -201,8 +199,8 @@ object Strategy {
   /**
    * Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
    * @param algo Algo.Classification or Algo.Regression
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   def defaultStrategy(algo: Algo): Strategy = algo match {
     case Algo.Classification =>
       new Strategy(algo = Classification, impurity = Gini, maxDepth = 10,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 0b6c7266de..73df6b054a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 
 /**
  * :: Experimental ::
  * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
  * binary classification.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 object Entropy extends Impurity {
 
@@ -36,8 +36,8 @@ object Entropy extends Impurity {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   @DeveloperApi
   override def calculate(counts: Array[Double], totalCount: Double): Double = {
     if (totalCount == 0) {
@@ -64,8 +64,8 @@ object Entropy extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Entropy.calculate")
@@ -73,8 +73,8 @@ object Entropy extends Impurity {
   /**
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def instance: this.type = this
 
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 3b0be42883..f21845b21a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,15 +17,15 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 
 /**
  * :: Experimental ::
  * Class for calculating the
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
  * during binary classification.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 object Gini extends Impurity {
 
@@ -35,8 +35,8 @@ object Gini extends Impurity {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   @DeveloperApi
   override def calculate(counts: Array[Double], totalCount: Double): Double = {
     if (totalCount == 0) {
@@ -60,8 +60,8 @@ object Gini extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Gini.calculate")
@@ -69,8 +69,8 @@ object Gini extends Impurity {
   /**
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def instance: this.type = this
 
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index dd29740005..4637dcceea 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 
 /**
  * :: Experimental ::
@@ -25,8 +25,8 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
  * This trait is used for
  *  (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]]
  *  (b) calculating impurity values from sufficient statistics.
- *  @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 trait Impurity extends Serializable {
 
@@ -36,8 +36,8 @@ trait Impurity extends Serializable {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   @DeveloperApi
   def calculate(counts: Array[Double], totalCount: Double): Double
 
@@ -48,8 +48,8 @@ trait Impurity extends Serializable {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @DeveloperApi
   def calculate(count: Double, sum: Double, sumSquares: Double): Double
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index adbe05811f..a74197278d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 
 /**
  * :: Experimental ::
  * Class for calculating variance during regression
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 object Variance extends Impurity {
 
@@ -33,8 +33,8 @@ object Variance extends Impurity {
    * @param counts Array[Double] with counts for each label
    * @param totalCount sum of counts for all labels
    * @return information value, or 0 if totalCount = 0
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   @DeveloperApi
   override def calculate(counts: Array[Double], totalCount: Double): Double =
      throw new UnsupportedOperationException("Variance.calculate")
@@ -46,8 +46,8 @@ object Variance extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    * @return information value, or 0 if count = 0
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
     if (count == 0) {
@@ -60,8 +60,8 @@ object Variance extends Impurity {
   /**
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def instance: this.type = this
 
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
index c6e3d0d824..bab7b8c6ca 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.loss
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.model.TreeEnsembleModel
 
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel
  * The absolute (L1) error is defined as:
  *  |y - F(x)|
  * where y is the label and F(x) is the model prediction for features x.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @DeveloperApi
 object AbsoluteError extends Loss {
 
@@ -41,8 +41,8 @@ object AbsoluteError extends Loss {
    * @param prediction Predicted label.
    * @param label True label.
    * @return Loss gradient
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   override def gradient(prediction: Double, label: Double): Double = {
     if (label - prediction < 0) 1.0 else -1.0
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
index eee58445a1..b2b4594712 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.loss
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.model.TreeEnsembleModel
 import org.apache.spark.mllib.util.MLUtils
@@ -31,8 +31,8 @@ import org.apache.spark.mllib.util.MLUtils
  * The log loss is defined as:
  *   2 log(1 + exp(-2 y F(x)))
  * where y is a label in {-1, 1} and F(x) is the model prediction for features x.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @DeveloperApi
 object LogLoss extends Loss {
 
@@ -43,8 +43,8 @@ object LogLoss extends Loss {
    * @param prediction Predicted label.
    * @param label True label.
    * @return Loss gradient
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   override def gradient(prediction: Double, label: Double): Double = {
     - 4.0 * label / (1.0 + math.exp(2.0 * label * prediction))
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
index 7c9fb92464..687cde325f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.loss
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.model.TreeEnsembleModel
 import org.apache.spark.rdd.RDD
@@ -26,8 +26,8 @@ import org.apache.spark.rdd.RDD
 /**
  * :: DeveloperApi ::
  * Trait for adding "pluggable" loss functions for the gradient boosting algorithm.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @DeveloperApi
 trait Loss extends Serializable {
 
@@ -36,8 +36,8 @@ trait Loss extends Serializable {
    * @param prediction Predicted feature
    * @param label true label.
    * @return Loss gradient.
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def gradient(prediction: Double, label: Double): Double
 
   /**
@@ -47,8 +47,8 @@ trait Loss extends Serializable {
    * @param model Model of the weak learner.
    * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return Measure of model error on data
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
     data.map(point => computeError(model.predict(point.features), point.label)).mean()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
index 47dc94cde7..2b112fbe12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.mllib.tree.loss
 
-/**
- * @since 1.2.0
- */
+import org.apache.spark.annotation.Since
+
+@Since("1.2.0")
 object Losses {
 
-  /**
-   * @since 1.2.0
-   */
+  @Since("1.2.0")
   def fromString(name: String): Loss = name match {
     case "leastSquaresError" => SquaredError
     case "leastAbsoluteError" => AbsoluteError
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
index ff8903d695..3f7d3d38be 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.loss
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.model.TreeEnsembleModel
 
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel
  * The squared (L2) error is defined as:
  *   (y - F(x))**2
  * where y is the label and F(x) is the model prediction for features x.
- * @since 1.2.0
  */
+@Since("1.2.0")
 @DeveloperApi
 object SquaredError extends Loss {
 
@@ -41,8 +41,8 @@ object SquaredError extends Loss {
    * @param prediction Predicted label.
    * @param label True label.
    * @return Loss gradient
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   override def gradient(prediction: Double, label: Double): Double = {
     - 2.0 * (label - prediction)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 0f386a2660..3eefd135f7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -24,7 +24,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.tree.configuration.{Algo, FeatureType}
@@ -40,8 +40,8 @@ import org.apache.spark.util.Utils
  * This model stores the decision tree structure and parameters.
  * @param topNode root node
  * @param algo algorithm type -- classification or regression
- * @since 1.0.0
  */
+@Since("1.0.0")
 @Experimental
 class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable with Saveable {
 
@@ -50,8 +50,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    *
    * @param features array representing a single data point
    * @return Double prediction from the trained model
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(features: Vector): Double = {
     topNode.predict(features)
   }
@@ -61,8 +61,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    *
    * @param features RDD representing data points to be predicted
    * @return RDD of predictions for each of the given data points
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def predict(features: RDD[Vector]): RDD[Double] = {
     features.map(x => predict(x))
   }
@@ -72,16 +72,16 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    *
    * @param features JavaRDD representing data points to be predicted
    * @return JavaRDD of predictions for each of the given data points
-   * @since 1.2.0
    */
+  @Since("1.2.0")
   def predict(features: JavaRDD[Vector]): JavaRDD[Double] = {
     predict(features.rdd)
   }
 
   /**
    * Get number of nodes in tree, including leaf nodes.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def numNodes: Int = {
     1 + topNode.numDescendants
   }
@@ -89,8 +89,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
   /**
    * Get depth of tree.
    * E.g.: Depth 0 means 1 leaf node.  Depth 1 means 1 internal node and 2 leaf nodes.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def depth: Int = {
     topNode.subtreeDepth
   }
@@ -119,8 +119,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
    *              If the directory already exists, this method throws an exception.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     DecisionTreeModel.SaveLoadV1_0.save(sc, path, this)
   }
@@ -128,9 +128,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
   override protected def formatVersion: String = DecisionTreeModel.formatVersion
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
 
   private[spark] def formatVersion: String = "1.0"
@@ -317,8 +315,8 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
    * @return  Model instance
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): DecisionTreeModel = {
     implicit val formats = DefaultFormats
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 23f0363639..091a0462c2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
 
 /**
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
  * @param rightImpurity right node impurity
  * @param leftPredict left node predict
  * @param rightPredict right node predict
- * @since 1.0.0
  */
+@Since("1.0.0")
 @DeveloperApi
 class InformationGainStats(
     val gain: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index aca3350c2e..8c54c55107 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.Logging
 import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.linalg.Vector
@@ -38,8 +38,8 @@ import org.apache.spark.mllib.linalg.Vector
  * @param leftNode  left child
  * @param rightNode right child
  * @param stats information gain stats
- * @since 1.0.0
  */
+@Since("1.0.0")
 @DeveloperApi
 class Node (
     val id: Int,
@@ -59,8 +59,8 @@ class Node (
   /**
    * build the left node and right nodes if not leaf
    * @param nodes array of nodes
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @deprecated("build should no longer be used since trees are constructed on-the-fly in training",
     "1.2.0")
   def build(nodes: Array[Node]): Unit = {
@@ -81,8 +81,8 @@ class Node (
    * predict value if node is not leaf
    * @param features feature value
    * @return predicted value
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def predict(features: Vector) : Double = {
     if (isLeaf) {
       predict.predict
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
index be819b59e7..965784051e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.mllib.tree.model
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
  * Predicted value for a node
  * @param predict predicted value
  * @param prob probability of the label (classification only)
- * @since 1.2.0
  */
+@Since("1.2.0")
 @DeveloperApi
 class Predict(
     val predict: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 18d40530ae..45db83ae3a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
 import org.apache.spark.mllib.tree.configuration.FeatureType
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
@@ -30,8 +30,8 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
  *                  Split left if feature <= threshold, else right.
  * @param featureType type of feature -- categorical or continuous
  * @param categories Split left if categorical feature value is in this set, else right.
- * @since 1.0.0
  */
+@Since("1.0.0")
 @DeveloperApi
 case class Split(
     feature: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 0c629b12a8..19571447a2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -45,8 +45,8 @@ import org.apache.spark.util.Utils
  *
  * @param algo algorithm for the ensemble model, either Classification or Regression
  * @param trees tree ensembles
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class RandomForestModel(override val algo: Algo, override val trees: Array[DecisionTreeModel])
   extends TreeEnsembleModel(algo, trees, Array.fill(trees.length)(1.0),
@@ -60,8 +60,8 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
    *              If the directory already exists, this method throws an exception.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
       RandomForestModel.SaveLoadV1_0.thisClassName)
@@ -70,9 +70,7 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis
   override protected def formatVersion: String = RandomForestModel.formatVersion
 }
 
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
 object RandomForestModel extends Loader[RandomForestModel] {
 
   private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
@@ -82,8 +80,8 @@ object RandomForestModel extends Loader[RandomForestModel] {
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
    * @return  Model instance
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): RandomForestModel = {
     val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path)
     val classNameV1_0 = SaveLoadV1_0.thisClassName
@@ -114,8 +112,8 @@ object RandomForestModel extends Loader[RandomForestModel] {
  * @param algo algorithm for the ensemble model, either Classification or Regression
  * @param trees tree ensembles
  * @param treeWeights tree ensemble weights
- * @since 1.2.0
  */
+@Since("1.2.0")
 @Experimental
 class GradientBoostedTreesModel(
     override val algo: Algo,
@@ -130,8 +128,8 @@ class GradientBoostedTreesModel(
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
    *              If the directory already exists, this method throws an exception.
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
       GradientBoostedTreesModel.SaveLoadV1_0.thisClassName)
@@ -143,8 +141,8 @@ class GradientBoostedTreesModel(
    * @param loss evaluation metric.
    * @return an array with index i having the losses or errors for the ensemble
    *         containing the first i+1 trees
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def evaluateEachIteration(
       data: RDD[LabeledPoint],
       loss: Loss): Array[Double] = {
@@ -186,8 +184,8 @@ class GradientBoostedTreesModel(
 }
 
 /**
- * @since 1.3.0
  */
+@Since("1.3.0")
 object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
 
   /**
@@ -199,8 +197,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param loss: evaluation metric.
    * @return a RDD with each element being a zip of the prediction and error
    *         corresponding to every sample.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def computeInitialPredictionAndError(
       data: RDD[LabeledPoint],
       initTreeWeight: Double,
@@ -223,8 +221,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param loss: evaluation metric.
    * @return a RDD with each element being a zip of the prediction and error
    *         corresponding to each sample.
-   * @since 1.4.0
    */
+  @Since("1.4.0")
   def updatePredictionError(
     data: RDD[LabeledPoint],
     predictionAndError: RDD[(Double, Double)],
@@ -248,8 +246,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
    * @return  Model instance
-   * @since 1.3.0
    */
+  @Since("1.3.0")
   override def load(sc: SparkContext, path: String): GradientBoostedTreesModel = {
     val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path)
     val classNameV1_0 = SaveLoadV1_0.thisClassName
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
index f520b3a1b7..bcaacc1b1f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
@@ -24,7 +24,6 @@ package org.apache.spark.mllib
  *  - information loss calculation with entropy and Gini for classification and
  *    variance for regression,
  *  - both continuous and categorical features.
- *  @since 1.0.0
  */
 package object tree {
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 11ed23176f..4940974bf4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -21,7 +21,7 @@ import scala.reflect.ClassTag
 
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.PartitionwiseSampledRDD
@@ -64,8 +64,8 @@ object MLUtils {
    *                    feature dimensions.
    * @param minPartitions min number of partitions
    * @return labeled data stored as an RDD[LabeledPoint]
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def loadLibSVMFile(
       sc: SparkContext,
       path: String,
@@ -115,9 +115,7 @@ object MLUtils {
 
   // Convenient methods for `loadLibSVMFile`.
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
   def loadLibSVMFile(
       sc: SparkContext,
@@ -130,17 +128,15 @@ object MLUtils {
   /**
    * Loads labeled data in the LIBSVM format into an RDD[LabeledPoint], with the default number of
    * partitions.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def loadLibSVMFile(
       sc: SparkContext,
       path: String,
       numFeatures: Int): RDD[LabeledPoint] =
     loadLibSVMFile(sc, path, numFeatures, sc.defaultMinPartitions)
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
   def loadLibSVMFile(
       sc: SparkContext,
@@ -149,9 +145,7 @@ object MLUtils {
       numFeatures: Int): RDD[LabeledPoint] =
     loadLibSVMFile(sc, path, numFeatures)
 
-  /**
-   * @since 1.0.0
-   */
+  @Since("1.0.0")
   @deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
   def loadLibSVMFile(
       sc: SparkContext,
@@ -162,8 +156,8 @@ object MLUtils {
   /**
    * Loads binary labeled data in the LIBSVM format into an RDD[LabeledPoint], with number of
    * features determined automatically and the default number of partitions.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def loadLibSVMFile(sc: SparkContext, path: String): RDD[LabeledPoint] =
     loadLibSVMFile(sc, path, -1)
 
@@ -193,15 +187,15 @@ object MLUtils {
    * @param path file or directory path in any Hadoop-supported file system URI
    * @param minPartitions min number of partitions
    * @return vectors stored as an RDD[Vector]
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def loadVectors(sc: SparkContext, path: String, minPartitions: Int): RDD[Vector] =
     sc.textFile(path, minPartitions).map(Vectors.parse)
 
   /**
    * Loads vectors saved using `RDD[Vector].saveAsTextFile` with the default number of partitions.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def loadVectors(sc: SparkContext, path: String): RDD[Vector] =
     sc.textFile(path, sc.defaultMinPartitions).map(Vectors.parse)
 
@@ -211,16 +205,16 @@ object MLUtils {
    * @param path file or directory path in any Hadoop-supported file system URI
    * @param minPartitions min number of partitions
    * @return labeled points stored as an RDD[LabeledPoint]
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def loadLabeledPoints(sc: SparkContext, path: String, minPartitions: Int): RDD[LabeledPoint] =
     sc.textFile(path, minPartitions).map(LabeledPoint.parse)
 
   /**
    * Loads labeled points saved using `RDD[LabeledPoint].saveAsTextFile` with the default number of
    * partitions.
-   * @since 1.1.0
    */
+  @Since("1.1.0")
   def loadLabeledPoints(sc: SparkContext, dir: String): RDD[LabeledPoint] =
     loadLabeledPoints(sc, dir, sc.defaultMinPartitions)
 
@@ -236,8 +230,8 @@ object MLUtils {
    *
    * @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and
    *            [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @deprecated("Should use MLUtils.loadLabeledPoints instead.", "1.0.1")
   def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
     sc.textFile(dir).map { line =>
@@ -258,8 +252,8 @@ object MLUtils {
    *
    * @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and
    *            [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @deprecated("Should use RDD[LabeledPoint].saveAsTextFile instead.", "1.0.1")
   def saveLabeledData(data: RDD[LabeledPoint], dir: String) {
     val dataStr = data.map(x => x.label + "," + x.features.toArray.mkString(" "))
@@ -271,8 +265,8 @@ object MLUtils {
    * Return a k element array of pairs of RDDs with the first element of each pair
    * containing the training data, a complement of the validation data and the second
    * element, the validation data, containing a unique 1/kth of the data. Where k=numFolds.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   @Experimental
   def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = {
     val numFoldsF = numFolds.toFloat
@@ -287,8 +281,8 @@ object MLUtils {
 
   /**
    * Returns a new vector with `1.0` (bias) appended to the input vector.
-   * @since 1.0.0
    */
+  @Since("1.0.0")
   def appendBias(vector: Vector): Vector = {
     vector match {
       case dv: DenseVector =>
author	MechCoder <manojkumarsivaraj334@gmail.com>	2015-08-21 14:19:24 -0700
committer	Xiangrui Meng <meng@databricks.com>	2015-08-21 14:19:24 -0700
commit	f5b028ed2f1ad6de43c8b50ebf480e1b6c047035 (patch)
tree	64f92dec3697098ef9c5c954f302cd1c796cedf1 /mllib
parent	d89cc38b33815e7b99fb3389b5038a543527065d (diff)
download	spark-f5b028ed2f1ad6de43c8b50ebf480e1b6c047035.tar.gz spark-f5b028ed2f1ad6de43c8b50ebf480e1b6c047035.tar.bz2 spark-f5b028ed2f1ad6de43c8b50ebf480e1b6c047035.zip