aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-08-21 14:19:24 -0700
committerXiangrui Meng <meng@databricks.com>2015-08-21 14:19:33 -0700
commite7db8761bd47ed53a313eb74f901c95ca89e23fb (patch)
tree59c6527318f987aa71143aa49fabeed014032ecc
parent4e72839b7b1e0b925837b49534a07188a603d838 (diff)
downloadspark-e7db8761bd47ed53a313eb74f901c95ca89e23fb.tar.gz
spark-e7db8761bd47ed53a313eb74f901c95ca89e23fb.tar.bz2
spark-e7db8761bd47ed53a313eb74f901c95ca89e23fb.zip
[SPARK-9864] [DOC] [MLlib] [SQL] Replace since in scaladoc to Since annotation
Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #8352 from MechCoder/since. (cherry picked from commit f5b028ed2f1ad6de43c8b50ebf480e1b6c047035) Signed-off-by: Xiangrui Meng <meng@databricks.com>
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala30
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala50
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala27
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala56
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala69
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala38
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala35
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala22
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala106
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala90
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala88
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala40
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala38
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala39
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala22
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala22
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala25
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala25
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala25
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala18
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala19
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala30
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala22
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala36
68 files changed, 692 insertions, 862 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index ba73024e3c..a29b425a71 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.classification
import org.json4s.{DefaultFormats, JValue}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.rdd.RDD
@@ -36,8 +36,8 @@ trait ClassificationModel extends Serializable {
*
* @param testData RDD representing data points to be predicted
* @return an RDD[Double] where each entry contains the corresponding prediction
- * @since 0.8.0
*/
+ @Since("0.8.0")
def predict(testData: RDD[Vector]): RDD[Double]
/**
@@ -45,16 +45,16 @@ trait ClassificationModel extends Serializable {
*
* @param testData array representing a single data point
* @return predicted category from the trained model
- * @since 0.8.0
*/
+ @Since("0.8.0")
def predict(testData: Vector): Double
/**
* Predict values for examples stored in a JavaRDD.
* @param testData JavaRDD representing data points to be predicted
* @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
- * @since 0.8.0
*/
+ @Since("0.8.0")
def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 268642ac6a..e03e662227 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -18,7 +18,7 @@
package org.apache.spark.mllib.classification
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.classification.impl.GLMClassificationModel
import org.apache.spark.mllib.linalg.BLAS.dot
import org.apache.spark.mllib.linalg.{DenseVector, Vector}
@@ -85,8 +85,8 @@ class LogisticRegressionModel (
* in Binary Logistic Regression. An example with prediction score greater than or equal to
* this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
* It is only used for binary classification.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@Experimental
def setThreshold(threshold: Double): this.type = {
this.threshold = Some(threshold)
@@ -97,8 +97,8 @@ class LogisticRegressionModel (
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
* It is only used for binary classification.
- * @since 1.3.0
*/
+ @Since("1.3.0")
@Experimental
def getThreshold: Option[Double] = threshold
@@ -106,8 +106,8 @@ class LogisticRegressionModel (
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
* It is only used for binary classification.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@Experimental
def clearThreshold(): this.type = {
threshold = None
@@ -158,9 +158,7 @@ class LogisticRegressionModel (
}
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
numFeatures, numClasses, weights, intercept, threshold)
@@ -168,9 +166,7 @@ class LogisticRegressionModel (
override protected def formatVersion: String = "1.0"
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def toString: String = {
s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
}
@@ -178,9 +174,7 @@ class LogisticRegressionModel (
object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): LogisticRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -261,8 +255,8 @@ object LogisticRegressionWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -284,8 +278,8 @@ object LogisticRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -306,8 +300,8 @@ object LogisticRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LogisticRegressionModel which has the weights and offset from training.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -324,8 +318,8 @@ object LogisticRegressionWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LogisticRegressionModel which has the weights and offset from training.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int): LogisticRegressionModel = {
@@ -361,8 +355,8 @@ class LogisticRegressionWithLBFGS
* Set the number of possible outcomes for k classes classification problem in
* Multinomial Logistic Regression.
* By default, it is binary logistic regression so k will be set to 2.
- * @since 1.3.0
*/
+ @Since("1.3.0")
@Experimental
def setNumClasses(numClasses: Int): this.type = {
require(numClasses > 1)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 2df91c0942..dab369207c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -25,6 +25,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.{Logging, SparkContext, SparkException}
+import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -444,8 +445,8 @@ object NaiveBayes {
*
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
- * @since 0.9.0
*/
+ @Since("0.9.0")
def train(input: RDD[LabeledPoint]): NaiveBayesModel = {
new NaiveBayes().run(input)
}
@@ -460,8 +461,8 @@ object NaiveBayes {
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
* @param lambda The smoothing parameter
- * @since 0.9.0
*/
+ @Since("0.9.0")
def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
new NaiveBayes(lambda, Multinomial).run(input)
}
@@ -483,8 +484,8 @@ object NaiveBayes {
*
* @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
* multinomial or bernoulli
- * @since 0.9.0
*/
+ @Since("0.9.0")
def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
require(supportedModelTypes.contains(modelType),
s"NaiveBayes was created with an unknown modelType: $modelType.")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 5b54feeb10..5f87269863 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -18,7 +18,7 @@
package org.apache.spark.mllib.classification
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.classification.impl.GLMClassificationModel
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.optimization._
@@ -46,8 +46,8 @@ class SVMModel (
* Sets the threshold that separates positive predictions from negative predictions. An example
* with prediction score greater than or equal to this threshold is identified as an positive,
* and negative otherwise. The default value is 0.0.
- * @since 1.3.0
*/
+ @Since("1.3.0")
@Experimental
def setThreshold(threshold: Double): this.type = {
this.threshold = Some(threshold)
@@ -57,16 +57,16 @@ class SVMModel (
/**
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
- * @since 1.3.0
*/
+ @Since("1.3.0")
@Experimental
def getThreshold: Option[Double] = threshold
/**
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@Experimental
def clearThreshold(): this.type = {
threshold = None
@@ -84,9 +84,7 @@ class SVMModel (
}
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
@@ -94,9 +92,7 @@ class SVMModel (
override protected def formatVersion: String = "1.0"
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def toString: String = {
s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
}
@@ -104,9 +100,7 @@ class SVMModel (
object SVMModel extends Loader[SVMModel] {
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): SVMModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -185,8 +179,8 @@ object SVMWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -209,8 +203,8 @@ object SVMWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -231,8 +225,8 @@ object SVMWithSGD {
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
* @return a SVMModel which has the weights and offset from training.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -250,8 +244,8 @@ object SVMWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a SVMModel which has the weights and offset from training.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
train(input, numIterations, 1.0, 0.01, 1.0)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index bc27b1fe73..fcc9dfecac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.IndexedSeq
import breeze.linalg.{diag, DenseMatrix => BreezeMatrix, DenseVector => BDV, Vector => BV}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, Matrices, Vector, Vectors}
import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
@@ -62,8 +62,8 @@ class GaussianMixture private (
/**
* Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
* maxIterations: 100, seed: random}.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def this() = this(2, 0.01, 100, Utils.random.nextLong())
// number of samples per cluster to use when initializing Gaussians
@@ -77,8 +77,8 @@ class GaussianMixture private (
* Set the initial GMM starting point, bypassing the random initialization.
* You must call setK() prior to calling this method, and the condition
* (model.k == this.k) must be met; failure will result in an IllegalArgumentException
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setInitialModel(model: GaussianMixtureModel): this.type = {
if (model.k == k) {
initialModel = Some(model)
@@ -90,14 +90,14 @@ class GaussianMixture private (
/**
* Return the user supplied initial GMM, if supplied
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getInitialModel: Option[GaussianMixtureModel] = initialModel
/**
* Set the number of Gaussians in the mixture model. Default: 2
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setK(k: Int): this.type = {
this.k = k
this
@@ -105,14 +105,14 @@ class GaussianMixture private (
/**
* Return the number of Gaussians in the mixture model
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getK: Int = k
/**
* Set the maximum number of iterations to run. Default: 100
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setMaxIterations(maxIterations: Int): this.type = {
this.maxIterations = maxIterations
this
@@ -120,15 +120,15 @@ class GaussianMixture private (
/**
* Return the maximum number of iterations to run
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getMaxIterations: Int = maxIterations
/**
* Set the largest change in log-likelihood at which convergence is
* considered to have occurred.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setConvergenceTol(convergenceTol: Double): this.type = {
this.convergenceTol = convergenceTol
this
@@ -137,14 +137,14 @@ class GaussianMixture private (
/**
* Return the largest change in log-likelihood at which convergence is
* considered to have occurred.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getConvergenceTol: Double = convergenceTol
/**
* Set the random seed
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setSeed(seed: Long): this.type = {
this.seed = seed
this
@@ -152,14 +152,14 @@ class GaussianMixture private (
/**
* Return the random seed
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getSeed: Long = seed
/**
* Perform expectation maximization
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run(data: RDD[Vector]): GaussianMixtureModel = {
val sc = data.sparkContext
@@ -235,8 +235,8 @@ class GaussianMixture private (
/**
* Java-friendly version of [[run()]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run(data: JavaRDD[Vector]): GaussianMixtureModel = run(data.rdd)
private def updateWeightsAndGaussians(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 2fa0473737..1a10a8b624 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -24,7 +24,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.{Vector, Matrices, Matrix}
import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
@@ -43,8 +43,8 @@ import org.apache.spark.sql.{SQLContext, Row}
* the weight for Gaussian i, and weights.sum == 1
* @param gaussians Array of MultivariateGaussian where gaussians(i) represents
* the Multivariate Gaussian (Normal) Distribution for Gaussian i
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class GaussianMixtureModel(
val weights: Array[Double],
@@ -54,23 +54,21 @@ class GaussianMixtureModel(
override protected def formatVersion = "1.0"
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def save(sc: SparkContext, path: String): Unit = {
GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians)
}
/**
* Number of gaussians in mixture
- * @since 1.3.0
*/
+ @Since("1.3.0")
def k: Int = weights.length
/**
* Maps given points to their cluster indices.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def predict(points: RDD[Vector]): RDD[Int] = {
val responsibilityMatrix = predictSoft(points)
responsibilityMatrix.map(r => r.indexOf(r.max))
@@ -78,8 +76,8 @@ class GaussianMixtureModel(
/**
* Maps given point to its cluster index.
- * @since 1.5.0
*/
+ @Since("1.5.0")
def predict(point: Vector): Int = {
val r = computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
r.indexOf(r.max)
@@ -87,16 +85,16 @@ class GaussianMixtureModel(
/**
* Java-friendly version of [[predict()]]
- * @since 1.4.0
*/
+ @Since("1.4.0")
def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
/**
* Given the input vectors, return the membership value of each vector
* to all mixture components.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = {
val sc = points.sparkContext
val bcDists = sc.broadcast(gaussians)
@@ -108,8 +106,8 @@ class GaussianMixtureModel(
/**
* Given the input vector, return the membership values to all mixture components.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def predictSoft(point: Vector): Array[Double] = {
computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
}
@@ -133,9 +131,7 @@ class GaussianMixtureModel(
}
}
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
@Experimental
object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
@@ -186,9 +182,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
}
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def load(sc: SparkContext, path: String) : GaussianMixtureModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
implicit val formats = DefaultFormats
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 9ef6834e5e..3e9545a74b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
import scala.collection.mutable.ArrayBuffer
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}
import org.apache.spark.mllib.util.MLUtils
@@ -49,20 +49,20 @@ class KMeans private (
/**
* Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
* initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random}.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
/**
* Number of clusters to create (k).
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getK: Int = k
/**
* Set the number of clusters to create (k). Default: 2.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setK(k: Int): this.type = {
this.k = k
this
@@ -70,14 +70,14 @@ class KMeans private (
/**
* Maximum number of iterations to run.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getMaxIterations: Int = maxIterations
/**
* Set maximum number of iterations to run. Default: 20.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setMaxIterations(maxIterations: Int): this.type = {
this.maxIterations = maxIterations
this
@@ -85,16 +85,16 @@ class KMeans private (
/**
* The initialization algorithm. This can be either "random" or "k-means||".
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getInitializationMode: String = initializationMode
/**
* Set the initialization algorithm. This can be either "random" to choose random points as
* initial cluster centers, or "k-means||" to use a parallel variant of k-means++
* (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setInitializationMode(initializationMode: String): this.type = {
KMeans.validateInitMode(initializationMode)
this.initializationMode = initializationMode
@@ -104,8 +104,8 @@ class KMeans private (
/**
* :: Experimental ::
* Number of runs of the algorithm to execute in parallel.
- * @since 1.4.0
*/
+ @Since("1.4.0")
@Experimental
def getRuns: Int = runs
@@ -114,8 +114,8 @@ class KMeans private (
* Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
* this many times with random starting conditions (configured by the initialization mode), then
* return the best clustering found over any run. Default: 1.
- * @since 0.8.0
*/
+ @Since("0.8.0")
@Experimental
def setRuns(runs: Int): this.type = {
if (runs <= 0) {
@@ -127,15 +127,15 @@ class KMeans private (
/**
* Number of steps for the k-means|| initialization mode
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getInitializationSteps: Int = initializationSteps
/**
* Set the number of steps for the k-means|| initialization mode. This is an advanced
* setting -- the default of 5 is almost always enough. Default: 5.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setInitializationSteps(initializationSteps: Int): this.type = {
if (initializationSteps <= 0) {
throw new IllegalArgumentException("Number of initialization steps must be positive")
@@ -146,15 +146,15 @@ class KMeans private (
/**
* The distance threshold within which we've consider centers to have converged.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getEpsilon: Double = epsilon
/**
* Set the distance threshold within which we've consider centers to have converged.
* If all centers move less than this Euclidean distance, we stop iterating one run.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setEpsilon(epsilon: Double): this.type = {
this.epsilon = epsilon
this
@@ -162,14 +162,14 @@ class KMeans private (
/**
* The random seed for cluster initialization.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getSeed: Long = seed
/**
* Set the random seed for cluster initialization.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setSeed(seed: Long): this.type = {
this.seed = seed
this
@@ -183,8 +183,8 @@ class KMeans private (
* Set the initial starting point, bypassing the random initialization or k-means||
* The condition model.k == this.k must be met, failure results
* in an IllegalArgumentException.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setInitialModel(model: KMeansModel): this.type = {
require(model.k == k, "mismatched cluster count")
initialModel = Some(model)
@@ -194,8 +194,8 @@ class KMeans private (
/**
* Train a K-means model on the given set of points; `data` should be cached for high
* performance, because this is an iterative algorithm.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def run(data: RDD[Vector]): KMeansModel = {
if (data.getStorageLevel == StorageLevel.NONE) {
@@ -453,14 +453,14 @@ class KMeans private (
/**
* Top-level methods for calling K-means clustering.
- * @since 0.8.0
*/
+@Since("0.8.0")
object KMeans {
// Initialization mode names
- /** @since 0.8.0 */
+ @Since("0.8.0")
val RANDOM = "random"
- /** @since 0.8.0 */
+ @Since("0.8.0")
val K_MEANS_PARALLEL = "k-means||"
/**
@@ -472,8 +472,8 @@ object KMeans {
* @param runs number of parallel runs, defaults to 1. The best model is returned.
* @param initializationMode initialization model, either "random" or "k-means||" (default).
* @param seed random seed value for cluster initialization
- * @since 1.3.0
*/
+ @Since("1.3.0")
def train(
data: RDD[Vector],
k: Int,
@@ -497,8 +497,8 @@ object KMeans {
* @param maxIterations max number of iterations
* @param runs number of parallel runs, defaults to 1. The best model is returned.
* @param initializationMode initialization model, either "random" or "k-means||" (default).
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
data: RDD[Vector],
k: Int,
@@ -514,8 +514,8 @@ object KMeans {
/**
* Trains a k-means model using specified parameters and the default values for unspecified.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
data: RDD[Vector],
k: Int,
@@ -525,8 +525,8 @@ object KMeans {
/**
* Trains a k-means model using specified parameters and the default values for unspecified.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
data: RDD[Vector],
k: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 23ea3672c4..13fc4a81ff 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -23,6 +23,7 @@ import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.pmml.PMMLExportable
@@ -34,35 +35,35 @@ import org.apache.spark.sql.Row
/**
* A clustering model for K-means. Each point belongs to the cluster with the closest center.
- * @since 0.8.0
*/
+@Since("0.8.0")
class KMeansModel (
val clusterCenters: Array[Vector]) extends Saveable with Serializable with PMMLExportable {
/**
* A Java-friendly constructor that takes an Iterable of Vectors.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def this(centers: java.lang.Iterable[Vector]) = this(centers.asScala.toArray)
/**
* Total number of clusters.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def k: Int = clusterCenters.length
/**
* Returns the cluster index that a given point belongs to.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def predict(point: Vector): Int = {
KMeans.findClosest(clusterCentersWithNorm, new VectorWithNorm(point))._1
}
/**
* Maps given points to their cluster indices.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(points: RDD[Vector]): RDD[Int] = {
val centersWithNorm = clusterCentersWithNorm
val bcCentersWithNorm = points.context.broadcast(centersWithNorm)
@@ -71,16 +72,16 @@ class KMeansModel (
/**
* Maps given points to their cluster indices.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
/**
* Return the K-means cost (sum of squared distances of points to their nearest center) for this
* model on the given data.
- * @since 0.8.0
*/
+ @Since("0.8.0")
def computeCost(data: RDD[Vector]): Double = {
val centersWithNorm = clusterCentersWithNorm
val bcCentersWithNorm = data.context.broadcast(centersWithNorm)
@@ -90,9 +91,7 @@ class KMeansModel (
private def clusterCentersWithNorm: Iterable[VectorWithNorm] =
clusterCenters.map(new VectorWithNorm(_))
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def save(sc: SparkContext, path: String): Unit = {
KMeansModel.SaveLoadV1_0.save(sc, this, path)
}
@@ -100,14 +99,10 @@ class KMeansModel (
override protected def formatVersion: String = "1.0"
}
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
object KMeansModel extends Loader[KMeansModel] {
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def load(sc: SparkContext, path: String): KMeansModel = {
KMeansModel.SaveLoadV1_0.load(sc, path)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 2a8c6acbae..92a321afb0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
import breeze.linalg.{DenseVector => BDV}
import org.apache.spark.Logging
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
import org.apache.spark.api.java.JavaPairRDD
import org.apache.spark.graphx._
import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -43,8 +43,8 @@ import org.apache.spark.util.Utils
*
* @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
* (Wikipedia)]]
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class LDA private (
private var k: Int,
@@ -57,8 +57,8 @@ class LDA private (
/**
* Constructs a LDA instance with default parameters.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def this() = this(k = 10, maxIterations = 20, docConcentration = Vectors.dense(-1),
topicConcentration = -1, seed = Utils.random.nextLong(), checkpointInterval = 10,
ldaOptimizer = new EMLDAOptimizer)
@@ -66,15 +66,15 @@ class LDA private (
/**
* Number of topics to infer. I.e., the number of soft cluster centers.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getK: Int = k
/**
* Number of topics to infer. I.e., the number of soft cluster centers.
* (default = 10)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setK(k: Int): this.type = {
require(k > 0, s"LDA k (number of clusters) must be > 0, but was set to $k")
this.k = k
@@ -86,8 +86,8 @@ class LDA private (
* distributions over topics ("theta").
*
* This is the parameter to a Dirichlet distribution.
- * @since 1.5.0
*/
+ @Since("1.5.0")
def getAsymmetricDocConcentration: Vector = this.docConcentration
/**
@@ -96,8 +96,8 @@ class LDA private (
*
* This method assumes the Dirichlet distribution is symmetric and can be described by a single
* [[Double]] parameter. It should fail if docConcentration is asymmetric.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getDocConcentration: Double = {
val parameter = docConcentration(0)
if (docConcentration.size == 1) {
@@ -131,8 +131,8 @@ class LDA private (
* - Values should be >= 0
* - default = uniformly (1.0 / k), following the implementation from
* [[https://github.com/Blei-Lab/onlineldavb]].
- * @since 1.5.0
*/
+ @Since("1.5.0")
def setDocConcentration(docConcentration: Vector): this.type = {
require(docConcentration.size > 0, "docConcentration must have > 0 elements")
this.docConcentration = docConcentration
@@ -141,8 +141,8 @@ class LDA private (
/**
* Replicates a [[Double]] docConcentration to create a symmetric prior.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setDocConcentration(docConcentration: Double): this.type = {
this.docConcentration = Vectors.dense(docConcentration)
this
@@ -150,26 +150,26 @@ class LDA private (
/**
* Alias for [[getAsymmetricDocConcentration]]
- * @since 1.5.0
*/
+ @Since("1.5.0")
def getAsymmetricAlpha: Vector = getAsymmetricDocConcentration
/**
* Alias for [[getDocConcentration]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getAlpha: Double = getDocConcentration
/**
* Alias for [[setDocConcentration()]]
- * @since 1.5.0
*/
+ @Since("1.5.0")
def setAlpha(alpha: Vector): this.type = setDocConcentration(alpha)
/**
* Alias for [[setDocConcentration()]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setAlpha(alpha: Double): this.type = setDocConcentration(alpha)
/**
@@ -180,8 +180,8 @@ class LDA private (
*
* Note: The topics' distributions over terms are called "beta" in the original LDA paper
* by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getTopicConcentration: Double = this.topicConcentration
/**
@@ -205,8 +205,8 @@ class LDA private (
* - Value should be >= 0
* - default = (1.0 / k), following the implementation from
* [[https://github.com/Blei-Lab/onlineldavb]].
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setTopicConcentration(topicConcentration: Double): this.type = {
this.topicConcentration = topicConcentration
this
@@ -214,27 +214,27 @@ class LDA private (
/**
* Alias for [[getTopicConcentration]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getBeta: Double = getTopicConcentration
/**
* Alias for [[setTopicConcentration()]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setBeta(beta: Double): this.type = setTopicConcentration(beta)
/**
* Maximum number of iterations for learning.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getMaxIterations: Int = maxIterations
/**
* Maximum number of iterations for learning.
* (default = 20)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setMaxIterations(maxIterations: Int): this.type = {
this.maxIterations = maxIterations
this
@@ -242,14 +242,14 @@ class LDA private (
/**
* Random seed
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getSeed: Long = seed
/**
* Random seed
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setSeed(seed: Long): this.type = {
this.seed = seed
this
@@ -257,8 +257,8 @@ class LDA private (
/**
* Period (in iterations) between checkpoints.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def getCheckpointInterval: Int = checkpointInterval
/**
@@ -268,8 +268,8 @@ class LDA private (
* [[org.apache.spark.SparkContext]], this setting is ignored.
*
* @see [[org.apache.spark.SparkContext#setCheckpointDir]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setCheckpointInterval(checkpointInterval: Int): this.type = {
this.checkpointInterval = checkpointInterval
this
@@ -280,8 +280,8 @@ class LDA private (
* :: DeveloperApi ::
*
* LDAOptimizer used to perform the actual calculation
- * @since 1.4.0
*/
+ @Since("1.4.0")
@DeveloperApi
def getOptimizer: LDAOptimizer = ldaOptimizer
@@ -289,8 +289,8 @@ class LDA private (
* :: DeveloperApi ::
*
* LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
- * @since 1.4.0
*/
+ @Since("1.4.0")
@DeveloperApi
def setOptimizer(optimizer: LDAOptimizer): this.type = {
this.ldaOptimizer = optimizer
@@ -300,8 +300,8 @@ class LDA private (
/**
* Set the LDAOptimizer used to perform the actual calculation by algorithm name.
* Currently "em", "online" are supported.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setOptimizer(optimizerName: String): this.type = {
this.ldaOptimizer =
optimizerName.toLowerCase match {
@@ -321,8 +321,8 @@ class LDA private (
* (where the vocabulary size is the length of the vector).
* Document IDs must be unique and >= 0.
* @return Inferred LDA model
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run(documents: RDD[(Long, Vector)]): LDAModel = {
val state = ldaOptimizer.initialize(documents, this)
var iter = 0
@@ -339,8 +339,8 @@ class LDA private (
/**
* Java-friendly version of [[run()]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run(documents: JavaPairRDD[java.lang.Long, Vector]): LDAModel = {
run(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 6bc68a4c18..667374a2bc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -192,24 +192,16 @@ class LocalLDAModel private[clustering] (
override protected[clustering] val gammaShape: Double = 100)
extends LDAModel with Serializable {
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def k: Int = topics.numCols
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def vocabSize: Int = topics.numRows
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def topicsMatrix: Matrix = topics
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
val brzTopics = topics.toBreeze.toDenseMatrix
Range(0, k).map { topicIndex =>
@@ -222,9 +214,7 @@ class LocalLDAModel private[clustering] (
override protected def formatVersion = "1.0"
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def save(sc: SparkContext, path: String): Unit = {
LocalLDAModel.SaveLoadV1_0.save(sc, path, topicsMatrix, docConcentration, topicConcentration,
gammaShape)
@@ -238,16 +228,16 @@ class LocalLDAModel private[clustering] (
*
* @param documents test corpus to use for calculating log likelihood
* @return variational lower bound on the log likelihood of the entire corpus
- * @since 1.5.0
*/
+ @Since("1.5.0")
def logLikelihood(documents: RDD[(Long, Vector)]): Double = logLikelihoodBound(documents,
docConcentration, topicConcentration, topicsMatrix.toBreeze.toDenseMatrix, gammaShape, k,
vocabSize)
/**
* Java-friendly version of [[logLikelihood]]
- * @since 1.5.0
*/
+ @Since("1.5.0")
def logLikelihood(documents: JavaPairRDD[java.lang.Long, Vector]): Double = {
logLikelihood(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
}
@@ -258,8 +248,8 @@ class LocalLDAModel private[clustering] (
*
* @param documents test corpus to use for calculating perplexity
* @return Variational upper bound on log perplexity per token.
- * @since 1.5.0
*/
+ @Since("1.5.0")
def logPerplexity(documents: RDD[(Long, Vector)]): Double = {
val corpusTokenCount = documents
.map { case (_, termCounts) => termCounts.toArray.sum }
@@ -267,9 +257,8 @@ class LocalLDAModel private[clustering] (
-logLikelihood(documents) / corpusTokenCount
}
- /** Java-friendly version of [[logPerplexity]]
- * @since 1.5.0
- */
+ /** Java-friendly version of [[logPerplexity]] */
+ @Since("1.5.0")
def logPerplexity(documents: JavaPairRDD[java.lang.Long, Vector]): Double = {
logPerplexity(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
}
@@ -347,8 +336,8 @@ class LocalLDAModel private[clustering] (
* for each document.
* @param documents documents to predict topic mixture distributions for
* @return An RDD of (document ID, topic mixture distribution for document)
- * @since 1.3.0
*/
+ @Since("1.3.0")
// TODO: declare in LDAModel and override once implemented in DistributedLDAModel
def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = {
// Double transpose because dirichletExpectation normalizes by row and we need to normalize
@@ -376,8 +365,8 @@ class LocalLDAModel private[clustering] (
/**
* Java-friendly version of [[topicDistributions]]
- * @since 1.4.1
*/
+ @Since("1.4.1")
def topicDistributions(
documents: JavaPairRDD[java.lang.Long, Vector]): JavaPairRDD[java.lang.Long, Vector] = {
val distributions = topicDistributions(documents.rdd.asInstanceOf[RDD[(Long, Vector)]])
@@ -451,9 +440,7 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
}
}
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def load(sc: SparkContext, path: String): LocalLDAModel = {
val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path)
implicit val formats = DefaultFormats
@@ -510,8 +497,8 @@ class DistributedLDAModel private[clustering] (
* Convert model to a local model.
* The local model stores the inferred topics but not the topic distributions for training
* documents.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def toLocal: LocalLDAModel = new LocalLDAModel(topicsMatrix, docConcentration, topicConcentration,
gammaShape)
@@ -521,8 +508,8 @@ class DistributedLDAModel private[clustering] (
* No guarantees are given about the ordering of the topics.
*
* WARNING: This matrix is collected from an RDD. Beware memory usage when vocabSize, k are large.
- * @since 1.3.0
*/
+ @Since("1.3.0")
override lazy val topicsMatrix: Matrix = {
// Collect row-major topics
val termTopicCounts: Array[(Int, TopicCounts)] =
@@ -541,9 +528,7 @@ class DistributedLDAModel private[clustering] (
Matrices.fromBreeze(brzTopics)
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = {
val numTopics = k
// Note: N_k is not needed to find the top terms, but it is needed to normalize weights
@@ -582,8 +567,8 @@ class DistributedLDAModel private[clustering] (
* @return Array over topics. Each element represent as a pair of matching arrays:
* (IDs for the documents, weights of the topic in these documents).
* For each topic, documents are sorted in order of decreasing topic weights.
- * @since 1.5.0
*/
+ @Since("1.5.0")
def topDocumentsPerTopic(maxDocumentsPerTopic: Int): Array[(Array[Long], Array[Double])] = {
val numTopics = k
val topicsInQueues: Array[BoundedPriorityQueue[(Double, Long)]] =
@@ -666,8 +651,8 @@ class DistributedLDAModel private[clustering] (
* - This excludes the prior; for that, use [[logPrior]].
* - Even with [[logPrior]], this is NOT the same as the data log likelihood given the
* hyperparameters.
- * @since 1.3.0
*/
+ @Since("1.3.0")
lazy val logLikelihood: Double = {
// TODO: generalize this for asymmetric (non-scalar) alpha
val alpha = this.docConcentration(0) // To avoid closure capture of enclosing object
@@ -693,8 +678,8 @@ class DistributedLDAModel private[clustering] (
/**
* Log probability of the current parameter estimate:
* log P(topics, topic distributions for docs | alpha, eta)
- * @since 1.3.0
*/
+ @Since("1.3.0")
lazy val logPrior: Double = {
// TODO: generalize this for asymmetric (non-scalar) alpha
val alpha = this.docConcentration(0) // To avoid closure capture of enclosing object
@@ -725,8 +710,8 @@ class DistributedLDAModel private[clustering] (
* ("theta_doc").
*
* @return RDD of (document ID, topic distribution) pairs
- * @since 1.3.0
*/
+ @Since("1.3.0")
def topicDistributions: RDD[(Long, Vector)] = {
graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
(docID.toLong, Vectors.fromBreeze(normalize(topicCounts, 1.0)))
@@ -735,8 +720,8 @@ class DistributedLDAModel private[clustering] (
/**
* Java-friendly version of [[topicDistributions]]
- * @since 1.4.1
*/
+ @Since("1.4.1")
def javaTopicDistributions: JavaPairRDD[java.lang.Long, Vector] = {
JavaPairRDD.fromRDD(topicDistributions.asInstanceOf[RDD[(java.lang.Long, Vector)]])
}
@@ -744,8 +729,8 @@ class DistributedLDAModel private[clustering] (
/**
* For each document, return the top k weighted topics for that document and their weights.
* @return RDD of (doc ID, topic indices, topic weights)
- * @since 1.5.0
*/
+ @Since("1.5.0")
def topTopicsPerDocument(k: Int): RDD[(Long, Array[Int], Array[Double])] = {
graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
val topIndices = argtopk(topicCounts, k)
@@ -761,8 +746,8 @@ class DistributedLDAModel private[clustering] (
/**
* Java-friendly version of [[topTopicsPerDocument]]
- * @since 1.5.0
*/
+ @Since("1.5.0")
def javaTopTopicsPerDocument(k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[Double])] = {
val topics = topTopicsPerDocument(k)
topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Double])]].toJavaRDD()
@@ -775,8 +760,8 @@ class DistributedLDAModel private[clustering] (
/**
* Java-friendly version of [[topicDistributions]]
- * @since 1.5.0
*/
+ @Since("1.5.0")
override def save(sc: SparkContext, path: String): Unit = {
DistributedLDAModel.SaveLoadV1_0.save(
sc, path, graph, globalTopicTotals, k, vocabSize, docConcentration, topicConcentration,
@@ -877,9 +862,7 @@ object DistributedLDAModel extends Loader[DistributedLDAModel] {
}
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def load(sc: SparkContext, path: String): DistributedLDAModel = {
val (loadedClassName, loadedVersion, metadata) = Loader.loadMetadata(sc, path)
implicit val formats = DefaultFormats
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index cb517f9689..5c2aae6403 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, all, normalize, su
import breeze.numerics.{trigamma, abs, exp}
import breeze.stats.distributions.{Gamma, RandBasis}
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.graphx._
import org.apache.spark.graphx.impl.GraphImpl
import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
@@ -35,8 +35,8 @@ import org.apache.spark.rdd.RDD
*
* An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can
* hold optimizer-specific parameters for users to set.
- * @since 1.4.0
*/
+@Since("1.4.0")
@DeveloperApi
sealed trait LDAOptimizer {
@@ -74,8 +74,8 @@ sealed trait LDAOptimizer {
* - Paper which clearly explains several algorithms, including EM:
* Asuncion, Welling, Smyth, and Teh.
* "On Smoothing and Inference for Topic Models." UAI, 2009.
- * @since 1.4.0
*/
+@Since("1.4.0")
@DeveloperApi
final class EMLDAOptimizer extends LDAOptimizer {
@@ -226,8 +226,8 @@ final class EMLDAOptimizer extends LDAOptimizer {
*
* Original Online LDA paper:
* Hoffman, Blei and Bach, "Online Learning for Latent Dirichlet Allocation." NIPS, 2010.
- * @since 1.4.0
*/
+@Since("1.4.0")
@DeveloperApi
final class OnlineLDAOptimizer extends LDAOptimizer {
@@ -276,16 +276,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
/**
* A (positive) learning parameter that downweights early iterations. Larger values make early
* iterations count less.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getTau0: Double = this.tau0
/**
* A (positive) learning parameter that downweights early iterations. Larger values make early
* iterations count less.
* Default: 1024, following the original Online LDA paper.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setTau0(tau0: Double): this.type = {
require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
this.tau0 = tau0
@@ -294,16 +294,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
/**
* Learning rate: exponential decay rate
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getKappa: Double = this.kappa
/**
* Learning rate: exponential decay rate---should be between
* (0.5, 1.0] to guarantee asymptotic convergence.
* Default: 0.51, based on the original Online LDA paper.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setKappa(kappa: Double): this.type = {
require(kappa >= 0, s"Online LDA kappa must be nonnegative, but was set to $kappa")
this.kappa = kappa
@@ -312,8 +312,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
/**
* Mini-batch fraction, which sets the fraction of document sampled and used in each iteration
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getMiniBatchFraction: Double = this.miniBatchFraction
/**
@@ -325,8 +325,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
* maxIterations * miniBatchFraction >= 1.
*
* Default: 0.05, i.e., 5% of total documents.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
require(miniBatchFraction > 0.0 && miniBatchFraction <= 1.0,
s"Online LDA miniBatchFraction must be in range (0,1], but was set to $miniBatchFraction")
@@ -337,16 +337,16 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
/**
* Optimize alpha, indicates whether alpha (Dirichlet parameter for document-topic distribution)
* will be optimized during training.
- * @since 1.5.0
*/
+ @Since("1.5.0")
def getOptimzeAlpha: Boolean = this.optimizeAlpha
/**
* Sets whether to optimize alpha parameter during training.
*
* Default: false
- * @since 1.5.0
*/
+ @Since("1.5.0")
def setOptimzeAlpha(optimizeAlpha: Boolean): this.type = {
this.optimizeAlpha = optimizeAlpha
this
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index b4733ca975..396b36f2f6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -21,7 +21,7 @@ import org.json4s.JsonDSL._
import org.json4s._
import org.json4s.jackson.JsonMethods._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.graphx._
import org.apache.spark.graphx.impl.GraphImpl
@@ -39,16 +39,14 @@ import org.apache.spark.{Logging, SparkContext, SparkException}
*
* @param k number of clusters
* @param assignments an RDD of clustering [[PowerIterationClustering#Assignment]]s
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class PowerIterationClusteringModel(
val k: Int,
val assignments: RDD[PowerIterationClustering.Assignment]) extends Saveable with Serializable {
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def save(sc: SparkContext, path: String): Unit = {
PowerIterationClusteringModel.SaveLoadV1_0.save(sc, this, path)
}
@@ -56,9 +54,7 @@ class PowerIterationClusteringModel(
override protected def formatVersion: String = "1.0"
}
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
object PowerIterationClusteringModel extends Loader[PowerIterationClusteringModel] {
override def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
PowerIterationClusteringModel.SaveLoadV1_0.load(sc, path)
@@ -73,8 +69,8 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
val thisClassName = "org.apache.spark.mllib.clustering.PowerIterationClusteringModel"
/**
- * @since 1.4.0
*/
+ @Since("1.4.0")
def save(sc: SparkContext, model: PowerIterationClusteringModel, path: String): Unit = {
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
@@ -87,9 +83,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
dataRDD.write.parquet(Loader.dataPath(path))
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
implicit val formats = DefaultFormats
val sqlContext = new SQLContext(sc)
@@ -136,14 +130,14 @@ class PowerIterationClustering private[clustering] (
/**
* Constructs a PIC instance with default parameters: {k: 2, maxIterations: 100,
* initMode: "random"}.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def this() = this(k = 2, maxIterations = 100, initMode = "random")
/**
* Set the number of clusters.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setK(k: Int): this.type = {
this.k = k
this
@@ -151,8 +145,8 @@ class PowerIterationClustering private[clustering] (
/**
* Set maximum number of iterations of the power iteration loop
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setMaxIterations(maxIterations: Int): this.type = {
this.maxIterations = maxIterations
this
@@ -161,8 +155,8 @@ class PowerIterationClustering private[clustering] (
/**
* Set the initialization mode. This can be either "random" to use a random vector
* as vertex properties, or "degree" to use normalized sum similarities. Default: random.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setInitializationMode(mode: String): this.type = {
this.initMode = mode match {
case "random" | "degree" => mode
@@ -182,8 +176,8 @@ class PowerIterationClustering private[clustering] (
* assume s,,ij,, = 0.0.
*
* @return a [[PowerIterationClusteringModel]] that contains the clustering result
- * @since 1.5.0
*/
+ @Since("1.5.0")
def run(graph: Graph[Double, Double]): PowerIterationClusteringModel = {
val w = normalize(graph)
val w0 = initMode match {
@@ -204,8 +198,8 @@ class PowerIterationClustering private[clustering] (
* assume s,,ij,, = 0.0.
*
* @return a [[PowerIterationClusteringModel]] that contains the clustering result
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run(similarities: RDD[(Long, Long, Double)]): PowerIterationClusteringModel = {
val w = normalize(similarities)
val w0 = initMode match {
@@ -217,8 +211,8 @@ class PowerIterationClustering private[clustering] (
/**
* A Java-friendly version of [[PowerIterationClustering.run]].
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run(similarities: JavaRDD[(java.lang.Long, java.lang.Long, java.lang.Double)])
: PowerIterationClusteringModel = {
run(similarities.rdd.asInstanceOf[RDD[(Long, Long, Double)]])
@@ -242,9 +236,7 @@ class PowerIterationClustering private[clustering] (
}
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
@Experimental
object PowerIterationClustering extends Logging {
@@ -253,8 +245,8 @@ object PowerIterationClustering extends Logging {
* Cluster assignment.
* @param id node id
* @param cluster assigned cluster id
- * @since 1.3.0
*/
+ @Since("1.3.0")
@Experimental
case class Assignment(id: Long, cluster: Int)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index a915804b02..41f2668ec6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering
import scala.reflect.ClassTag
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaSparkContext._
import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
import org.apache.spark.rdd.RDD
@@ -63,9 +63,8 @@ import org.apache.spark.util.random.XORShiftRandom
* such that at time t + h the discount applied to the data from t is 0.5.
* The definition remains the same whether the time unit is given
* as batches or points.
- * @since 1.2.0
- *
*/
+@Since("1.2.0")
@Experimental
class StreamingKMeansModel(
override val clusterCenters: Array[Vector],
@@ -73,8 +72,8 @@ class StreamingKMeansModel(
/**
* Perform a k-means update on a batch of data.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def update(data: RDD[Vector], decayFactor: Double, timeUnit: String): StreamingKMeansModel = {
// find nearest cluster to each point
@@ -166,23 +165,23 @@ class StreamingKMeansModel(
* .setRandomCenters(5, 100.0)
* .trainOn(DStream)
* }}}
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
class StreamingKMeans(
var k: Int,
var decayFactor: Double,
var timeUnit: String) extends Logging with Serializable {
- /** @since 1.2.0 */
+ @Since("1.2.0")
def this() = this(2, 1.0, StreamingKMeans.BATCHES)
protected var model: StreamingKMeansModel = new StreamingKMeansModel(null, null)
/**
* Set the number of clusters.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setK(k: Int): this.type = {
this.k = k
this
@@ -190,8 +189,8 @@ class StreamingKMeans(
/**
* Set the decay factor directly (for forgetful algorithms).
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setDecayFactor(a: Double): this.type = {
this.decayFactor = a
this
@@ -199,8 +198,8 @@ class StreamingKMeans(
/**
* Set the half life and time unit ("batches" or "points") for forgetful algorithms.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setHalfLife(halfLife: Double, timeUnit: String): this.type = {
if (timeUnit != StreamingKMeans.BATCHES && timeUnit != StreamingKMeans.POINTS) {
throw new IllegalArgumentException("Invalid time unit for decay: " + timeUnit)
@@ -213,8 +212,8 @@ class StreamingKMeans(
/**
* Specify initial centers directly.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setInitialCenters(centers: Array[Vector], weights: Array[Double]): this.type = {
model = new StreamingKMeansModel(centers, weights)
this
@@ -226,8 +225,8 @@ class StreamingKMeans(
* @param dim Number of dimensions
* @param weight Weight for each center
* @param seed Random seed
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setRandomCenters(dim: Int, weight: Double, seed: Long = Utils.random.nextLong): this.type = {
val random = new XORShiftRandom(seed)
val centers = Array.fill(k)(Vectors.dense(Array.fill(dim)(random.nextGaussian())))
@@ -238,8 +237,8 @@ class StreamingKMeans(
/**
* Return the latest model.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def latestModel(): StreamingKMeansModel = {
model
}
@@ -251,8 +250,8 @@ class StreamingKMeans(
* and updates the model using each batch of data from the stream.
*
* @param data DStream containing vector data
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainOn(data: DStream[Vector]) {
assertInitialized()
data.foreachRDD { (rdd, time) =>
@@ -262,8 +261,8 @@ class StreamingKMeans(
/**
* Java-friendly version of `trainOn`.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def trainOn(data: JavaDStream[Vector]): Unit = trainOn(data.dstream)
/**
@@ -271,8 +270,8 @@ class StreamingKMeans(
*
* @param data DStream containing vector data
* @return DStream containing predictions
- * @since 1.2.0
*/
+ @Since("1.2.0")
def predictOn(data: DStream[Vector]): DStream[Int] = {
assertInitialized()
data.map(model.predict)
@@ -280,8 +279,8 @@ class StreamingKMeans(
/**
* Java-friendly version of `predictOn`.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Integer] = {
JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Integer]])
}
@@ -292,8 +291,8 @@ class StreamingKMeans(
* @param data DStream containing (key, feature vector) pairs
* @tparam K key type
* @return DStream containing the input keys and the predictions as values
- * @since 1.2.0
*/
+ @Since("1.2.0")
def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Int)] = {
assertInitialized()
data.mapValues(model.predict)
@@ -301,8 +300,8 @@ class StreamingKMeans(
/**
* Java-friendly version of `predictOnValues`.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def predictOnValues[K](
data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Integer] = {
implicit val tag = fakeClassTag[K]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 486741edd6..76ae847921 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.evaluation
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.Logging
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.evaluation.binary._
@@ -41,8 +41,8 @@ import org.apache.spark.sql.DataFrame
* of bins may not exactly equal numBins. The last bin in each partition may
* be smaller as a result, meaning there may be an extra sample at
* partition boundaries.
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class BinaryClassificationMetrics(
val scoreAndLabels: RDD[(Double, Double)],
@@ -52,8 +52,8 @@ class BinaryClassificationMetrics(
/**
* Defaults `numBins` to 0.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def this(scoreAndLabels: RDD[(Double, Double)]) = this(scoreAndLabels, 0)
/**
@@ -65,16 +65,16 @@ class BinaryClassificationMetrics(
/**
* Unpersist intermediate RDDs used in the computation.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def unpersist() {
cumulativeCounts.unpersist()
}
/**
* Returns thresholds in descending order.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def thresholds(): RDD[Double] = cumulativeCounts.map(_._1)
/**
@@ -82,8 +82,8 @@ class BinaryClassificationMetrics(
* which is an RDD of (false positive rate, true positive rate)
* with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
* @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
- * @since 1.0.0
*/
+ @Since("1.0.0")
def roc(): RDD[(Double, Double)] = {
val rocCurve = createCurve(FalsePositiveRate, Recall)
val sc = confusions.context
@@ -94,16 +94,16 @@ class BinaryClassificationMetrics(
/**
* Computes the area under the receiver operating characteristic (ROC) curve.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def areaUnderROC(): Double = AreaUnderCurve.of(roc())
/**
* Returns the precision-recall curve, which is an RDD of (recall, precision),
* NOT (precision, recall), with (0.0, 1.0) prepended to it.
* @see http://en.wikipedia.org/wiki/Precision_and_recall
- * @since 1.0.0
*/
+ @Since("1.0.0")
def pr(): RDD[(Double, Double)] = {
val prCurve = createCurve(Recall, Precision)
val sc = confusions.context
@@ -113,8 +113,8 @@ class BinaryClassificationMetrics(
/**
* Computes the area under the precision-recall curve.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def areaUnderPR(): Double = AreaUnderCurve.of(pr())
/**
@@ -122,26 +122,26 @@ class BinaryClassificationMetrics(
* @param beta the beta factor in F-Measure computation.
* @return an RDD of (threshold, F-Measure) pairs.
* @see http://en.wikipedia.org/wiki/F1_score
- * @since 1.0.0
*/
+ @Since("1.0.0")
def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = createCurve(FMeasure(beta))
/**
* Returns the (threshold, F-Measure) curve with beta = 1.0.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def fMeasureByThreshold(): RDD[(Double, Double)] = fMeasureByThreshold(1.0)
/**
* Returns the (threshold, precision) curve.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def precisionByThreshold(): RDD[(Double, Double)] = createCurve(Precision)
/**
* Returns the (threshold, recall) curve.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def recallByThreshold(): RDD[(Double, Double)] = createCurve(Recall)
private lazy val (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index dddfa3ea5b..02e89d9210 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.evaluation
import scala.collection.Map
import org.apache.spark.SparkContext._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.linalg.{Matrices, Matrix}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
@@ -30,8 +30,8 @@ import org.apache.spark.sql.DataFrame
* Evaluator for multiclass classification.
*
* @param predictionAndLabels an RDD of (prediction, label) pairs.
- * @since 1.1.0
*/
+@Since("1.1.0")
@Experimental
class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
@@ -65,8 +65,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* predicted classes are in columns,
* they are ordered by class label ascending,
* as in "labels"
- * @since 1.1.0
*/
+ @Since("1.1.0")
def confusionMatrix: Matrix = {
val n = labels.size
val values = Array.ofDim[Double](n * n)
@@ -85,15 +85,15 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns true positive rate for a given label (category)
* @param label the label.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def truePositiveRate(label: Double): Double = recall(label)
/**
* Returns false positive rate for a given label (category)
* @param label the label.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def falsePositiveRate(label: Double): Double = {
val fp = fpByClass.getOrElse(label, 0)
fp.toDouble / (labelCount - labelCountByClass(label))
@@ -102,8 +102,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns precision for a given label (category)
* @param label the label.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def precision(label: Double): Double = {
val tp = tpByClass(label)
val fp = fpByClass.getOrElse(label, 0)
@@ -113,16 +113,16 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns recall for a given label (category)
* @param label the label.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label)
/**
* Returns f-measure for a given label (category)
* @param label the label.
* @param beta the beta parameter.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def fMeasure(label: Double, beta: Double): Double = {
val p = precision(label)
val r = recall(label)
@@ -133,8 +133,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns f1-measure for a given label (category)
* @param label the label.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def fMeasure(label: Double): Double = fMeasure(label, 1.0)
/**
@@ -187,8 +187,8 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns weighted averaged f-measure
* @param beta the beta parameter.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) =>
fMeasure(category, beta) * count.toDouble / labelCount
}.sum
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index 77cb1e09bd..a0a8d9c568 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -17,6 +17,7 @@
package org.apache.spark.mllib.evaluation
+import org.apache.spark.annotation.Since
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.sql.DataFrame
@@ -25,8 +26,8 @@ import org.apache.spark.sql.DataFrame
* Evaluator for multilabel classification.
* @param predictionAndLabels an RDD of (predictions, labels) pairs,
* both are non-null Arrays, each with unique elements.
- * @since 1.2.0
*/
+@Since("1.2.0")
class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
/**
@@ -104,8 +105,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns precision for a given label (category)
* @param label the label.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def precision(label: Double): Double = {
val tp = tpPerClass(label)
val fp = fpPerClass.getOrElse(label, 0L)
@@ -115,8 +116,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns recall for a given label (category)
* @param label the label.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def recall(label: Double): Double = {
val tp = tpPerClass(label)
val fn = fnPerClass.getOrElse(label, 0L)
@@ -126,8 +127,8 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns f1-measure for a given label (category)
* @param label the label.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def f1Measure(label: Double): Double = {
val p = precision(label)
val r = recall(label)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 063fbed8cd..a7f43f0b11 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
import scala.reflect.ClassTag
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
import org.apache.spark.rdd.RDD
@@ -34,8 +34,8 @@ import org.apache.spark.rdd.RDD
* Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
*
* @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])])
extends Logging with Serializable {
@@ -56,8 +56,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
*
* @param k the position to compute the truncated precision, must be positive
* @return the average precision at the first k ranking positions
- * @since 1.2.0
*/
+ @Since("1.2.0")
def precisionAt(k: Int): Double = {
require(k > 0, "ranking position k should be positive")
predictionAndLabels.map { case (pred, lab) =>
@@ -126,8 +126,8 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
*
* @param k the position to compute the truncated ndcg, must be positive
* @return the average ndcg at the first k ranking positions
- * @since 1.2.0
*/
+ @Since("1.2.0")
def ndcgAt(k: Int): Double = {
require(k > 0, "ranking position k should be positive")
predictionAndLabels.map { case (pred, lab) =>
@@ -165,8 +165,8 @@ object RankingMetrics {
/**
* Creates a [[RankingMetrics]] instance (for Java users).
* @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
- * @since 1.4.0
*/
+ @Since("1.4.0")
def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
implicit val tag = JavaSparkContext.fakeClassTag[E]
val rdd = predictionAndLabels.rdd.map { case (predictions, labels) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 54dfd8c099..36a6c357c3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.evaluation
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.rdd.RDD
import org.apache.spark.Logging
import org.apache.spark.mllib.linalg.Vectors
@@ -29,8 +29,8 @@ import org.apache.spark.sql.DataFrame
* Evaluator for regression.
*
* @param predictionAndObservations an RDD of (prediction, observation) pairs.
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
@@ -67,8 +67,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
* Returns the variance explained by regression.
* explainedVariance = \sum_i (\hat{y_i} - \bar{y})^2 / n
* @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
- * @since 1.2.0
*/
+ @Since("1.2.0")
def explainedVariance: Double = {
SSreg / summary.count
}
@@ -76,8 +76,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
/**
* Returns the mean absolute error, which is a risk function corresponding to the
* expected value of the absolute error loss or l1-norm loss.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def meanAbsoluteError: Double = {
summary.normL1(1) / summary.count
}
@@ -85,8 +85,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
/**
* Returns the mean squared error, which is a risk function corresponding to the
* expected value of the squared error loss or quadratic loss.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def meanSquaredError: Double = {
SSerr / summary.count
}
@@ -94,8 +94,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
/**
* Returns the root mean squared error, which is defined as the square root of
* the mean squared error.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def rootMeanSquaredError: Double = {
math.sqrt(this.meanSquaredError)
}
@@ -103,8 +103,8 @@ class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extend
/**
* Returns R^2^, the unadjusted coefficient of determination.
* @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
- * @since 1.2.0
*/
+ @Since("1.2.0")
def r2: Double = {
1 - SSerr / SStot
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 7f4de77044..ba3b447a83 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -20,7 +20,7 @@ import scala.collection.JavaConverters._
import scala.reflect.ClassTag
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.mllib.fpm.AssociationRules.Rule
@@ -33,24 +33,22 @@ import org.apache.spark.rdd.RDD
* Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates
* association rules which have a single item as the consequent.
*
- * @since 1.5.0
*/
+@Since("1.5.0")
@Experimental
class AssociationRules private[fpm] (
private var minConfidence: Double) extends Logging with Serializable {
/**
* Constructs a default instance with default parameters {minConfidence = 0.8}.
- *
- * @since 1.5.0
*/
+ @Since("1.5.0")
def this() = this(0.8)
/**
* Sets the minimal confidence (default: `0.8`).
- *
- * @since 1.5.0
*/
+ @Since("1.5.0")
def setMinConfidence(minConfidence: Double): this.type = {
require(minConfidence >= 0.0 && minConfidence <= 1.0)
this.minConfidence = minConfidence
@@ -62,8 +60,8 @@ class AssociationRules private[fpm] (
* @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
* @return a [[Set[Rule[Item]]] containing the assocation rules.
*
- * @since 1.5.0
*/
+ @Since("1.5.0")
def run[Item: ClassTag](freqItemsets: RDD[FreqItemset[Item]]): RDD[Rule[Item]] = {
// For candidate rule X => Y, generate (X, (Y, freq(X union Y)))
val candidates = freqItemsets.flatMap { itemset =>
@@ -102,8 +100,8 @@ object AssociationRules {
* instead.
* @tparam Item item type
*
- * @since 1.5.0
*/
+ @Since("1.5.0")
@Experimental
class Rule[Item] private[fpm] (
val antecedent: Array[Item],
@@ -114,8 +112,8 @@ object AssociationRules {
/**
* Returns the confidence of the rule.
*
- * @since 1.5.0
*/
+ @Since("1.5.0")
def confidence: Double = freqUnion.toDouble / freqAntecedent
require(antecedent.toSet.intersect(consequent.toSet).isEmpty, {
@@ -127,8 +125,8 @@ object AssociationRules {
/**
* Returns antecedent in a Java List.
*
- * @since 1.5.0
*/
+ @Since("1.5.0")
def javaAntecedent: java.util.List[Item] = {
antecedent.toList.asJava
}
@@ -136,8 +134,8 @@ object AssociationRules {
/**
* Returns consequent in a Java List.
*
- * @since 1.5.0
*/
+ @Since("1.5.0")
def javaConsequent: java.util.List[Item] = {
consequent.toList.asJava
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index e2370a52f4..e37f806271 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
import scala.reflect.ClassTag
import org.apache.spark.{HashPartitioner, Logging, Partitioner, SparkException}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.mllib.fpm.FPGrowth._
@@ -39,15 +39,15 @@ import org.apache.spark.storage.StorageLevel
* @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]]
* @tparam Item item type
*
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
/**
* Generates association rules for the [[Item]]s in [[freqItemsets]].
* @param confidence minimal confidence of the rules produced
- * @since 1.5.0
*/
+ @Since("1.5.0")
def generateAssociationRules(confidence: Double): RDD[AssociationRules.Rule[Item]] = {
val associationRules = new AssociationRules(confidence)
associationRules.run(freqItemsets)
@@ -71,8 +71,8 @@ class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) ex
* @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
* (Wikipedia)]]
*
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class FPGrowth private (
private var minSupport: Double,
@@ -82,15 +82,15 @@ class FPGrowth private (
* Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same
* as the input data}.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def this() = this(0.3, -1)
/**
* Sets the minimal support level (default: `0.3`).
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setMinSupport(minSupport: Double): this.type = {
this.minSupport = minSupport
this
@@ -99,8 +99,8 @@ class FPGrowth private (
/**
* Sets the number of partitions used by parallel FP-growth (default: same as input data).
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def setNumPartitions(numPartitions: Int): this.type = {
this.numPartitions = numPartitions
this
@@ -111,8 +111,8 @@ class FPGrowth private (
* @param data input data set, each element contains a transaction
* @return an [[FPGrowthModel]]
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def run[Item: ClassTag](data: RDD[Array[Item]]): FPGrowthModel[Item] = {
if (data.getStorageLevel == StorageLevel.NONE) {
logWarning("Input data is not cached.")
@@ -213,8 +213,8 @@ class FPGrowth private (
/**
* :: Experimental ::
*
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
object FPGrowth {
@@ -224,15 +224,15 @@ object FPGrowth {
* @param freq frequency
* @tparam Item item type
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
class FreqItemset[Item](val items: Array[Item], val freq: Long) extends Serializable {
/**
* Returns items in a Java List.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def javaItems: java.util.List[Item] = {
items.toList.asJava
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index dfa8910fcb..28b5b4637b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHash
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types._
@@ -227,8 +227,8 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
* @param values matrix entries in column major if not transposed or in row major otherwise
* @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
* row major.
- * @since 1.0.0
*/
+@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class DenseMatrix(
val numRows: Int,
@@ -253,8 +253,8 @@ class DenseMatrix(
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
- * @since 1.3.0
*/
+ @Since("1.3.0")
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)
@@ -278,9 +278,7 @@ class DenseMatrix(
private[mllib] def apply(i: Int): Double = values(i)
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def apply(i: Int, j: Int): Double = values(index(i, j))
private[mllib] def index(i: Int, j: Int): Int = {
@@ -291,9 +289,7 @@ class DenseMatrix(
values(index(i, j)) = v
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())
private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f),
@@ -309,9 +305,7 @@ class DenseMatrix(
this
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
@@ -342,21 +336,17 @@ class DenseMatrix(
}
}
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def numNonzeros: Int = values.count(_ != 0)
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def numActives: Int = values.length
/**
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def toSparse: SparseMatrix = {
val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
val colPtrs: Array[Int] = new Array[Int](numCols + 1)
@@ -383,8 +373,8 @@ class DenseMatrix(
/**
* Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]].
- * @since 1.3.0
*/
+@Since("1.3.0")
object DenseMatrix {
/**
@@ -392,8 +382,8 @@ object DenseMatrix {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
- * @since 1.3.0
*/
+ @Since("1.3.0")
def zeros(numRows: Int, numCols: Int): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
s"$numRows x $numCols dense matrix is too large to allocate")
@@ -405,8 +395,8 @@ object DenseMatrix {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
- * @since 1.3.0
*/
+ @Since("1.3.0")
def ones(numRows: Int, numCols: Int): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
s"$numRows x $numCols dense matrix is too large to allocate")
@@ -417,8 +407,8 @@ object DenseMatrix {
* Generate an Identity Matrix in `DenseMatrix` format.
* @param n number of rows and columns of the matrix
* @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
- * @since 1.3.0
*/
+ @Since("1.3.0")
def eye(n: Int): DenseMatrix = {
val identity = DenseMatrix.zeros(n, n)
var i = 0
@@ -435,8 +425,8 @@ object DenseMatrix {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
s"$numRows x $numCols dense matrix is too large to allocate")
@@ -449,8 +439,8 @@ object DenseMatrix {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
s"$numRows x $numCols dense matrix is too large to allocate")
@@ -462,8 +452,8 @@ object DenseMatrix {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
* on the diagonal
- * @since 1.3.0
*/
+ @Since("1.3.0")
def diag(vector: Vector): DenseMatrix = {
val n = vector.size
val matrix = DenseMatrix.zeros(n, n)
@@ -498,8 +488,8 @@ object DenseMatrix {
* @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
* Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
* and `rowIndices` behave as colIndices, and `values` are stored in row major.
- * @since 1.2.0
*/
+@Since("1.2.0")
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class SparseMatrix(
val numRows: Int,
@@ -536,8 +526,8 @@ class SparseMatrix(
* @param rowIndices the row index of the entry. They must be in strictly increasing
* order for each column
* @param values non-zero matrix entries in column major
- * @since 1.3.0
*/
+ @Since("1.3.0")
def this(
numRows: Int,
numCols: Int,
@@ -560,8 +550,8 @@ class SparseMatrix(
}
/**
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def apply(i: Int, j: Int): Double = {
val ind = index(i, j)
if (ind < 0) 0.0 else values(ind)
@@ -585,9 +575,7 @@ class SparseMatrix(
}
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def copy: SparseMatrix = {
new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
}
@@ -605,9 +593,7 @@ class SparseMatrix(
this
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def transpose: SparseMatrix =
new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)
@@ -641,28 +627,24 @@ class SparseMatrix(
/**
* Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
* set to false.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def toDense: DenseMatrix = {
new DenseMatrix(numRows, numCols, toArray)
}
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def numNonzeros: Int = values.count(_ != 0)
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def numActives: Int = values.length
}
/**
* Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]].
- * @since 1.3.0
*/
+@Since("1.3.0")
object SparseMatrix {
/**
@@ -673,8 +655,8 @@ object SparseMatrix {
* @param numCols number of columns of the matrix
* @param entries Array of (i, j, value) tuples
* @return The corresponding `SparseMatrix`
- * @since 1.3.0
*/
+ @Since("1.3.0")
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
val numEntries = sortedEntries.size
@@ -722,8 +704,8 @@ object SparseMatrix {
* Generate an Identity Matrix in `SparseMatrix` format.
* @param n number of rows and columns of the matrix
* @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
- * @since 1.3.0
*/
+ @Since("1.3.0")
def speye(n: Int): SparseMatrix = {
new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
}
@@ -792,8 +774,8 @@ object SparseMatrix {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
val mat = genRandMatrix(numRows, numCols, density, rng)
mat.update(i => rng.nextDouble())
@@ -806,8 +788,8 @@ object SparseMatrix {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
val mat = genRandMatrix(numRows, numCols, density, rng)
mat.update(i => rng.nextGaussian())
@@ -818,8 +800,8 @@ object SparseMatrix {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
* `values` on the diagonal
- * @since 1.3.0
*/
+ @Since("1.3.0")
def spdiag(vector: Vector): SparseMatrix = {
val n = vector.size
vector match {
@@ -835,8 +817,8 @@ object SparseMatrix {
/**
* Factory methods for [[org.apache.spark.mllib.linalg.Matrix]].
- * @since 1.0.0
*/
+@Since("1.0.0")
object Matrices {
/**
@@ -845,8 +827,8 @@ object Matrices {
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
- * @since 1.0.0
*/
+ @Since("1.0.0")
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
new DenseMatrix(numRows, numCols, values)
}
@@ -859,8 +841,8 @@ object Matrices {
* @param colPtrs the index corresponding to the start of a new column
* @param rowIndices the row index of the entry
* @param values non-zero matrix entries in column major
- * @since 1.2.0
*/
+ @Since("1.2.0")
def sparse(
numRows: Int,
numCols: Int,
@@ -893,8 +875,8 @@ object Matrices {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `Matrix` with size `numRows` x `numCols` and values of zeros
- * @since 1.2.0
*/
+ @Since("1.2.0")
def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
/**
@@ -902,24 +884,24 @@ object Matrices {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `Matrix` with size `numRows` x `numCols` and values of ones
- * @since 1.2.0
*/
+ @Since("1.2.0")
def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
/**
* Generate a dense Identity Matrix in `Matrix` format.
* @param n number of rows and columns of the matrix
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
- * @since 1.2.0
*/
+ @Since("1.2.0")
def eye(n: Int): Matrix = DenseMatrix.eye(n)
/**
* Generate a sparse Identity Matrix in `Matrix` format.
* @param n number of rows and columns of the matrix
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
- * @since 1.3.0
*/
+ @Since("1.3.0")
def speye(n: Int): Matrix = SparseMatrix.speye(n)
/**
@@ -928,8 +910,8 @@ object Matrices {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
- * @since 1.2.0
*/
+ @Since("1.2.0")
def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
DenseMatrix.rand(numRows, numCols, rng)
@@ -940,8 +922,8 @@ object Matrices {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
SparseMatrix.sprand(numRows, numCols, density, rng)
@@ -951,8 +933,8 @@ object Matrices {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
- * @since 1.2.0
*/
+ @Since("1.2.0")
def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
DenseMatrix.randn(numRows, numCols, rng)
@@ -963,8 +945,8 @@ object Matrices {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
- * @since 1.3.0
*/
+ @Since("1.3.0")
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
SparseMatrix.sprandn(numRows, numCols, density, rng)
@@ -973,8 +955,8 @@ object Matrices {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `Matrix` with size `values.length` x `values.length` and `values`
* on the diagonal
- * @since 1.2.0
*/
+ @Since("1.2.0")
def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
/**
@@ -983,8 +965,8 @@ object Matrices {
* a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were horizontally concatenated
- * @since 1.3.0
*/
+ @Since("1.3.0")
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
return new DenseMatrix(0, 0, Array[Double]())
@@ -1042,8 +1024,8 @@ object Matrices {
* a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were vertically concatenated
- * @since 1.3.0
*/
+ @Since("1.3.0")
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
return new DenseMatrix(0, 0, Array[Double]())
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
index 8f504f6984..a37aca99d5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
@@ -17,13 +17,13 @@
package org.apache.spark.mllib.linalg
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
/**
* :: Experimental ::
* Represents singular value decomposition (SVD) factors.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 52ef7be3b3..3d577edbe2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -26,7 +26,7 @@ import scala.collection.JavaConverters._
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
import org.apache.spark.SparkException
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.{AlphaComponent, Since}
import org.apache.spark.mllib.util.NumericParser
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
@@ -240,14 +240,14 @@ class VectorUDT extends UserDefinedType[Vector] {
* Factory methods for [[org.apache.spark.mllib.linalg.Vector]].
* We don't use the name `Vector` because Scala imports
* [[scala.collection.immutable.Vector]] by default.
- * @since 1.0.0
*/
+@Since("1.0.0")
object Vectors {
/**
* Creates a dense vector from its values.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@varargs
def dense(firstValue: Double, otherValues: Double*): Vector =
new DenseVector((firstValue +: otherValues).toArray)
@@ -255,8 +255,8 @@ object Vectors {
// A dummy implicit is used to avoid signature collision with the one generated by @varargs.
/**
* Creates a dense vector from a double array.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def dense(values: Array[Double]): Vector = new DenseVector(values)
/**
@@ -265,8 +265,8 @@ object Vectors {
* @param size vector size.
* @param indices index array, must be strictly increasing.
* @param values value array, must have the same length as indices.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
new SparseVector(size, indices, values)
@@ -275,8 +275,8 @@ object Vectors {
*
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
require(size > 0, "The size of the requested sparse vector must be greater than 0.")
@@ -297,8 +297,8 @@ object Vectors {
*
* @param size vector size.
* @param elements vector elements in (index, value) pairs.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
sparse(size, elements.asScala.map { case (i, x) =>
(i.intValue(), x.doubleValue())
@@ -310,16 +310,16 @@ object Vectors {
*
* @param size vector size
* @return a zero vector
- * @since 1.1.0
*/
+ @Since("1.1.0")
def zeros(size: Int): Vector = {
new DenseVector(new Array[Double](size))
}
/**
* Parses a string resulted from [[Vector.toString]] into a [[Vector]].
- * @since 1.1.0
*/
+ @Since("1.1.0")
def parse(s: String): Vector = {
parseNumeric(NumericParser.parse(s))
}
@@ -362,8 +362,8 @@ object Vectors {
* @param vector input vector.
* @param p norm.
* @return norm in L^p^ space.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def norm(vector: Vector, p: Double): Double = {
require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
s"You specified p=$p.")
@@ -415,8 +415,8 @@ object Vectors {
* @param v1 first Vector.
* @param v2 second Vector.
* @return squared distance between two Vectors.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def sqdist(v1: Vector, v2: Vector): Double = {
require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
s"=${v2.size}.")
@@ -529,33 +529,25 @@ object Vectors {
/**
* A dense vector represented by a value array.
- * @since 1.0.0
*/
+@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
class DenseVector(val values: Array[Double]) extends Vector {
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
override def size: Int = values.length
override def toString: String = values.mkString("[", ",", "]")
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
override def toArray: Array[Double] = values
private[spark] override def toBreeze: BV[Double] = new BDV[Double](values)
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
override def apply(i: Int): Double = values(i)
- /**
- * @since 1.1.0
- */
+ @Since("1.1.0")
override def copy: DenseVector = {
new DenseVector(values.clone())
}
@@ -587,14 +579,10 @@ class DenseVector(val values: Array[Double]) extends Vector {
result
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def numActives: Int = size
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def numNonzeros: Int = {
// same as values.count(_ != 0.0) but faster
var nnz = 0
@@ -606,9 +594,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
nnz
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def toSparse: SparseVector = {
val nnz = numNonzeros
val ii = new Array[Int](nnz)
@@ -624,9 +610,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
new SparseVector(size, ii, vv)
}
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def argmax: Int = {
if (size == 0) {
-1
@@ -646,9 +630,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
}
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object DenseVector {
/** Extracts the value array from a dense vector. */
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
@@ -660,8 +642,8 @@ object DenseVector {
* @param size size of the vector.
* @param indices index array, assume to be strictly increasing.
* @param values value array, must have the same length as the index array.
- * @since 1.0.0
*/
+@Since("1.0.0")
@SQLUserDefinedType(udt = classOf[VectorUDT])
class SparseVector(
override val size: Int,
@@ -677,9 +659,7 @@ class SparseVector(
override def toString: String =
s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
override def toArray: Array[Double] = {
val data = new Array[Double](size)
var i = 0
@@ -691,9 +671,7 @@ class SparseVector(
data
}
- /**
- * @since 1.1.0
- */
+ @Since("1.1.0")
override def copy: SparseVector = {
new SparseVector(size, indices.clone(), values.clone())
}
@@ -734,14 +712,10 @@ class SparseVector(
result
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def numActives: Int = values.length
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def numNonzeros: Int = {
var nnz = 0
values.foreach { v =>
@@ -752,9 +726,7 @@ class SparseVector(
nnz
}
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def toSparse: SparseVector = {
val nnz = numNonzeros
if (nnz == numActives) {
@@ -774,9 +746,7 @@ class SparseVector(
}
}
- /**
- * @since 1.5.0
- */
+ @Since("1.5.0")
override def argmax: Int = {
if (size == 0) {
-1
@@ -847,9 +817,7 @@ class SparseVector(
}
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object SparseVector {
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index cfb6680a18..94376c24a7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
import breeze.linalg.{DenseMatrix => BDM}
import org.apache.spark.{Logging, Partitioner, SparkException}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix}
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
@@ -128,9 +128,8 @@ private[mllib] object GridPartitioner {
* the number of rows will be calculated when `numRows` is invoked.
* @param nCols Number of columns of this matrix. If the supplied value is less than or equal to
* zero, the number of columns will be calculated when `numCols` is invoked.
- * @since 1.3.0
- *
*/
+@Since("1.3.0")
@Experimental
class BlockMatrix(
val blocks: RDD[((Int, Int), Matrix)],
@@ -151,10 +150,8 @@ class BlockMatrix(
* rows are not required to have the given number of rows
* @param colsPerBlock Number of columns that make up each block. The blocks forming the final
* columns are not required to have the given number of columns
- *
- * @since 1.3.0
- *
*/
+ @Since("1.3.0")
def this(
blocks: RDD[((Int, Int), Matrix)],
rowsPerBlock: Int,
@@ -162,20 +159,13 @@ class BlockMatrix(
this(blocks, rowsPerBlock, colsPerBlock, 0L, 0L)
}
- /**
- * @since 1.3.0
- * */
-
+ @Since("1.3.0")
override def numRows(): Long = {
if (nRows <= 0L) estimateDim()
nRows
}
- /**
- *
- * @since 1.3.0
- */
-
+ @Since("1.3.0")
override def numCols(): Long = {
if (nCols <= 0L) estimateDim()
nCols
@@ -206,8 +196,8 @@ class BlockMatrix(
/**
* Validates the block matrix info against the matrix data (`blocks`) and throws an exception if
* any error is found.
- * @since 1.3.0
*/
+ @Since("1.3.0")
def validate(): Unit = {
logDebug("Validating BlockMatrix...")
// check if the matrix is larger than the claimed dimensions
@@ -243,25 +233,22 @@ class BlockMatrix(
logDebug("BlockMatrix is valid!")
}
- /** Caches the underlying RDD.
- * @since 1.3.0
- * */
+ /** Caches the underlying RDD. */
+ @Since("1.3.0")
def cache(): this.type = {
blocks.cache()
this
}
- /** Persists the underlying RDD with the specified storage level.
- * @since 1.3.0
- * */
+ /** Persists the underlying RDD with the specified storage level. */
+ @Since("1.3.0")
def persist(storageLevel: StorageLevel): this.type = {
blocks.persist(storageLevel)
this
}
- /** Converts to CoordinateMatrix.
- * @since 1.3.0
- * */
+ /** Converts to CoordinateMatrix. */
+ @Since("1.3.0")
def toCoordinateMatrix(): CoordinateMatrix = {
val entryRDD = blocks.flatMap { case ((blockRowIndex, blockColIndex), mat) =>
val rowStart = blockRowIndex.toLong * rowsPerBlock
@@ -275,9 +262,8 @@ class BlockMatrix(
new CoordinateMatrix(entryRDD, numRows(), numCols())
}
- /** Converts to IndexedRowMatrix. The number of columns must be within the integer range.
- * @since 1.3.0
- * */
+ /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+ @Since("1.3.0")
def toIndexedRowMatrix(): IndexedRowMatrix = {
require(numCols() < Int.MaxValue, "The number of columns must be within the integer range. " +
s"numCols: ${numCols()}")
@@ -285,9 +271,8 @@ class BlockMatrix(
toCoordinateMatrix().toIndexedRowMatrix()
}
- /** Collect the distributed matrix on the driver as a `DenseMatrix`.
- * @since 1.3.0
- * */
+ /** Collect the distributed matrix on the driver as a `DenseMatrix`. */
+ @Since("1.3.0")
def toLocalMatrix(): Matrix = {
require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " +
s"Int.MaxValue. Currently numRows: ${numRows()}")
@@ -312,11 +297,11 @@ class BlockMatrix(
new DenseMatrix(m, n, values)
}
- /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
- * same underlying data. Is a lazy operation.
- * @since 1.3.0
- *
- * */
+ /**
+ * Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the
+ * same underlying data. Is a lazy operation.
+ */
+ @Since("1.3.0")
def transpose: BlockMatrix = {
val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) =>
((blockColIndex, blockRowIndex), mat.transpose)
@@ -330,13 +315,14 @@ class BlockMatrix(
new BDM[Double](localMat.numRows, localMat.numCols, localMat.toArray)
}
- /** Adds two block matrices together. The matrices must have the same size and matching
- * `rowsPerBlock` and `colsPerBlock` values. If one of the blocks that are being added are
- * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even
- * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will
- * also be a [[DenseMatrix]].
- * @since 1.3.0
- */
+ /**
+ * Adds two block matrices together. The matrices must have the same size and matching
+ * `rowsPerBlock` and `colsPerBlock` values. If one of the blocks that are being added are
+ * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even
+ * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will
+ * also be a [[DenseMatrix]].
+ */
+ @Since("1.3.0")
def add(other: BlockMatrix): BlockMatrix = {
require(numRows() == other.numRows(), "Both matrices must have the same number of rows. " +
s"A.numRows: ${numRows()}, B.numRows: ${other.numRows()}")
@@ -364,14 +350,14 @@ class BlockMatrix(
}
}
- /** Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
- * of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
- * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
- * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
- * some performance issues until support for multiplying two sparse matrices is added.
- *
- * @since 1.3.0
- */
+ /**
+ * Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
+ * of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
+ * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
+ * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
+ * some performance issues until support for multiplying two sparse matrices is added.
+ */
+ @Since("1.3.0")
def multiply(other: BlockMatrix): BlockMatrix = {
require(numCols() == other.numRows(), "The number of columns of A and the number of rows " +
s"of B must be equal. A.numCols: ${numCols()}, B.numRows: ${other.numRows()}. If you " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 2b751e45dd..4bb27ec840 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.linalg.distributed
import breeze.linalg.{DenseMatrix => BDM}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
* @param i row index
* @param j column index
* @param value value of the entry
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
case class MatrixEntry(i: Long, j: Long, value: Double)
@@ -43,22 +43,20 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
* be determined by the max row index plus one.
* @param nCols number of columns. A non-positive value means unknown, and then the number of
* columns will be determined by the max column index plus one.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
class CoordinateMatrix(
val entries: RDD[MatrixEntry],
private var nRows: Long,
private var nCols: Long) extends DistributedMatrix {
- /** Alternative constructor leaving matrix dimensions to be determined automatically.
- * @since 1.0.0
- * */
+ /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+ @Since("1.0.0")
def this(entries: RDD[MatrixEntry]) = this(entries, 0L, 0L)
- /** Gets or computes the number of columns.
- * @since 1.0.0
- * */
+ /** Gets or computes the number of columns. */
+ @Since("1.0.0")
override def numCols(): Long = {
if (nCols <= 0L) {
computeSize()
@@ -66,9 +64,8 @@ class CoordinateMatrix(
nCols
}
- /** Gets or computes the number of rows.
- * @since 1.0.0
- * */
+ /** Gets or computes the number of rows. */
+ @Since("1.0.0")
override def numRows(): Long = {
if (nRows <= 0L) {
computeSize()
@@ -76,16 +73,14 @@ class CoordinateMatrix(
nRows
}
- /** Transposes this CoordinateMatrix.
- * @since 1.3.0
- * */
+ /** Transposes this CoordinateMatrix. */
+ @Since("1.3.0")
def transpose(): CoordinateMatrix = {
new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
}
- /** Converts to IndexedRowMatrix. The number of columns must be within the integer range.
- * @since 1.0.0
- * */
+ /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
+ @Since("1.0.0")
def toIndexedRowMatrix(): IndexedRowMatrix = {
val nl = numCols()
if (nl > Int.MaxValue) {
@@ -104,15 +99,14 @@ class CoordinateMatrix(
/**
* Converts to RowMatrix, dropping row indices after grouping by row index.
* The number of columns must be within the integer range.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def toRowMatrix(): RowMatrix = {
toIndexedRowMatrix().toRowMatrix()
}
- /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024.
- * @since 1.3.0
- * */
+ /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+ @Since("1.3.0")
def toBlockMatrix(): BlockMatrix = {
toBlockMatrix(1024, 1024)
}
@@ -124,8 +118,8 @@ class CoordinateMatrix(
* @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
* a smaller value. Must be an integer value greater than 0.
* @return a [[BlockMatrix]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
require(rowsPerBlock > 0,
s"rowsPerBlock needs to be greater than 0. rowsPerBlock: $rowsPerBlock")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
index 98e90af84a..e51327ebb7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
@@ -19,10 +19,12 @@ package org.apache.spark.mllib.linalg.distributed
import breeze.linalg.{DenseMatrix => BDM}
+import org.apache.spark.annotation.Since
+
/**
* Represents a distributively stored matrix backed by one or more RDDs.
- * @since 1.0.0
*/
+@Since("1.0.0")
trait DistributedMatrix extends Serializable {
/** Gets or computes the number of rows. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index a09f88ce28..6d2c05a47d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.linalg.distributed
import breeze.linalg.{DenseMatrix => BDM}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.linalg.SingularValueDecomposition
@@ -27,8 +27,8 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition
/**
* :: Experimental ::
* Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]].
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
case class IndexedRow(index: Long, vector: Vector)
@@ -42,23 +42,19 @@ case class IndexedRow(index: Long, vector: Vector)
* be determined by the max row index plus one.
* @param nCols number of columns. A non-positive value means unknown, and then the number of
* columns will be determined by the size of the first row.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
class IndexedRowMatrix(
val rows: RDD[IndexedRow],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix {
- /** Alternative constructor leaving matrix dimensions to be determined automatically.
- * @since 1.0.0
- * */
+ /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+ @Since("1.0.0")
def this(rows: RDD[IndexedRow]) = this(rows, 0L, 0)
- /**
- *
- * @since 1.0.0
- */
+ @Since("1.0.0")
override def numCols(): Long = {
if (nCols <= 0) {
// Calling `first` will throw an exception if `rows` is empty.
@@ -67,10 +63,7 @@ class IndexedRowMatrix(
nCols
}
- /**
- *
- * @since 1.0.0
- */
+ @Since("1.0.0")
override def numRows(): Long = {
if (nRows <= 0L) {
// Reduce will throw an exception if `rows` is empty.
@@ -82,15 +75,14 @@ class IndexedRowMatrix(
/**
* Drops row indices and converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.RowMatrix]].
- * @since 1.0.0
*/
+ @Since("1.0.0")
def toRowMatrix(): RowMatrix = {
new RowMatrix(rows.map(_.vector), 0L, nCols)
}
- /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024.
- * @since 1.3.0
- * */
+ /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+ @Since("1.3.0")
def toBlockMatrix(): BlockMatrix = {
toBlockMatrix(1024, 1024)
}
@@ -102,8 +94,8 @@ class IndexedRowMatrix(
* @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
* a smaller value. Must be an integer value greater than 0.
* @return a [[BlockMatrix]]
- * @since 1.3.0
*/
+ @Since("1.3.0")
def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = {
// TODO: This implementation may be optimized
toCoordinateMatrix().toBlockMatrix(rowsPerBlock, colsPerBlock)
@@ -112,8 +104,8 @@ class IndexedRowMatrix(
/**
* Converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
- * @since 1.3.0
*/
+ @Since("1.3.0")
def toCoordinateMatrix(): CoordinateMatrix = {
val entries = rows.flatMap { row =>
val rowIndex = row.index
@@ -149,8 +141,8 @@ class IndexedRowMatrix(
* @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
* are treated as zero, where sigma(0) is the largest singular value.
* @return SingularValueDecomposition(U, s, V)
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computeSVD(
k: Int,
computeU: Boolean = false,
@@ -176,8 +168,8 @@ class IndexedRowMatrix(
*
* @param B a local matrix whose number of rows must match the number of columns of this matrix
* @return an IndexedRowMatrix representing the product, which preserves partitioning
- * @since 1.0.0
*/
+ @Since("1.0.0")
def multiply(B: Matrix): IndexedRowMatrix = {
val mat = toRowMatrix().multiply(B)
val indexedRows = rows.map(_.index).zip(mat.rows).map { case (i, v) =>
@@ -188,8 +180,8 @@ class IndexedRowMatrix(
/**
* Computes the Gramian matrix `A^T A`.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computeGramianMatrix(): Matrix = {
toRowMatrix().computeGramianMatrix()
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index b2e94f2dd6..78036eba5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -28,7 +28,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging
import org.apache.spark.SparkContext._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
import org.apache.spark.rdd.RDD
@@ -44,22 +44,20 @@ import org.apache.spark.storage.StorageLevel
* be determined by the number of records in the RDD `rows`.
* @param nCols number of columns. A non-positive value means unknown, and then the number of
* columns will be determined by the size of the first row.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
class RowMatrix(
val rows: RDD[Vector],
private var nRows: Long,
private var nCols: Int) extends DistributedMatrix with Logging {
- /** Alternative constructor leaving matrix dimensions to be determined automatically.
- * @since 1.0.0
- * */
+ /** Alternative constructor leaving matrix dimensions to be determined automatically. */
+ @Since("1.0.0")
def this(rows: RDD[Vector]) = this(rows, 0L, 0)
- /** Gets or computes the number of columns.
- * @since 1.0.0
- * */
+ /** Gets or computes the number of columns. */
+ @Since("1.0.0")
override def numCols(): Long = {
if (nCols <= 0) {
try {
@@ -74,9 +72,8 @@ class RowMatrix(
nCols
}
- /** Gets or computes the number of rows.
- * @since 1.0.0
- * */
+ /** Gets or computes the number of rows. */
+ @Since("1.0.0")
override def numRows(): Long = {
if (nRows <= 0L) {
nRows = rows.count()
@@ -114,8 +111,8 @@ class RowMatrix(
/**
* Computes the Gramian matrix `A^T A`.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computeGramianMatrix(): Matrix = {
val n = numCols().toInt
checkNumColumns(n)
@@ -185,8 +182,8 @@ class RowMatrix(
* @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
* are treated as zero, where sigma(0) is the largest singular value.
* @return SingularValueDecomposition(U, s, V). U = null if computeU = false.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computeSVD(
k: Int,
computeU: Boolean = false,
@@ -326,8 +323,8 @@ class RowMatrix(
/**
* Computes the covariance matrix, treating each row as an observation.
* @return a local dense matrix of size n x n
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computeCovariance(): Matrix = {
val n = numCols().toInt
checkNumColumns(n)
@@ -380,8 +377,8 @@ class RowMatrix(
*
* @param k number of top principal components.
* @return a matrix of size n-by-k, whose columns are principal components
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computePrincipalComponents(k: Int): Matrix = {
val n = numCols().toInt
require(k > 0 && k <= n, s"k = $k out of range (0, n = $n]")
@@ -399,8 +396,8 @@ class RowMatrix(
/**
* Computes column-wise summary statistics.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def computeColumnSummaryStatistics(): MultivariateStatisticalSummary = {
val summary = rows.treeAggregate(new MultivariateOnlineSummarizer)(
(aggregator, data) => aggregator.add(data),
@@ -415,8 +412,8 @@ class RowMatrix(
* @param B a local matrix whose number of rows must match the number of columns of this matrix
* @return a [[org.apache.spark.mllib.linalg.distributed.RowMatrix]] representing the product,
* which preserves partitioning
- * @since 1.0.0
*/
+ @Since("1.0.0")
def multiply(B: Matrix): RowMatrix = {
val n = numCols().toInt
val k = B.numCols
@@ -448,8 +445,8 @@ class RowMatrix(
*
* @return An n x n sparse upper-triangular matrix of cosine similarities between
* columns of this matrix.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def columnSimilarities(): CoordinateMatrix = {
columnSimilarities(0.0)
}
@@ -492,8 +489,8 @@ class RowMatrix(
* with the cost vs estimate quality trade-off described above.
* @return An n x n sparse upper-triangular matrix of cosine similarities
* between columns of this matrix.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def columnSimilarities(threshold: Double): CoordinateMatrix = {
require(threshold >= 0, s"Threshold cannot be negative: $threshold")
@@ -671,9 +668,7 @@ class RowMatrix(
}
}
-/**
- * @since 1.0.0
- */
+@Since("1.0.0")
@Experimental
object RowMatrix {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 56c549ef99..b27ef1b949 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -18,7 +18,7 @@
package org.apache.spark.mllib.recommendation
import org.apache.spark.Logging
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.ml.recommendation.{ALS => NewALS}
import org.apache.spark.rdd.RDD
@@ -26,8 +26,8 @@ import org.apache.spark.storage.StorageLevel
/**
* A more compact class to represent a rating than Tuple3[Int, Int, Double].
- * @since 0.8.0
*/
+@Since("0.8.0")
case class Rating(user: Int, product: Int, rating: Double)
/**
@@ -255,8 +255,8 @@ class ALS private (
/**
* Top-level methods for calling Alternating Least Squares (ALS) matrix factorization.
- * @since 0.8.0
*/
+@Since("0.8.0")
object ALS {
/**
* Train a matrix factorization model given an RDD of ratings given by users to some products,
@@ -271,8 +271,8 @@ object ALS {
* @param lambda regularization factor (recommended: 0.01)
* @param blocks level of parallelism to split computation into
* @param seed random seed
- * @since 0.9.1
*/
+ @Since("0.9.1")
def train(
ratings: RDD[Rating],
rank: Int,
@@ -296,8 +296,8 @@ object ALS {
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
* @param blocks level of parallelism to split computation into
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
ratings: RDD[Rating],
rank: Int,
@@ -319,8 +319,8 @@ object ALS {
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double)
: MatrixFactorizationModel = {
train(ratings, rank, iterations, lambda, -1)
@@ -336,8 +336,8 @@ object ALS {
* @param ratings RDD of (userID, productID, rating) pairs
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(ratings: RDD[Rating], rank: Int, iterations: Int)
: MatrixFactorizationModel = {
train(ratings, rank, iterations, 0.01, -1)
@@ -357,8 +357,8 @@ object ALS {
* @param blocks level of parallelism to split computation into
* @param alpha confidence parameter
* @param seed random seed
- * @since 0.8.1
*/
+ @Since("0.8.1")
def trainImplicit(
ratings: RDD[Rating],
rank: Int,
@@ -384,8 +384,8 @@ object ALS {
* @param lambda regularization factor (recommended: 0.01)
* @param blocks level of parallelism to split computation into
* @param alpha confidence parameter
- * @since 0.8.1
*/
+ @Since("0.8.1")
def trainImplicit(
ratings: RDD[Rating],
rank: Int,
@@ -409,8 +409,8 @@ object ALS {
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
* @param alpha confidence parameter
- * @since 0.8.1
*/
+ @Since("0.8.1")
def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, alpha: Double)
: MatrixFactorizationModel = {
trainImplicit(ratings, rank, iterations, lambda, -1, alpha)
@@ -427,8 +427,8 @@ object ALS {
* @param ratings RDD of (userID, productID, rating) pairs
* @param rank number of features to use
* @param iterations number of iterations of ALS (recommended: 10-20)
- * @since 0.8.1
*/
+ @Since("0.8.1")
def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int)
: MatrixFactorizationModel = {
trainImplicit(ratings, rank, iterations, 0.01, -1, 1.0)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 261ca9cef0..ba4cfdcd9f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -30,6 +30,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
@@ -49,8 +50,8 @@ import org.apache.spark.storage.StorageLevel
* the features computed for this user.
* @param productFeatures RDD of tuples where each tuple represents the productId
* and the features computed for this product.
- * @since 0.8.0
*/
+@Since("0.8.0")
class MatrixFactorizationModel(
val rank: Int,
val userFeatures: RDD[(Int, Array[Double])],
@@ -74,9 +75,8 @@ class MatrixFactorizationModel(
}
}
- /** Predict the rating of one user for one product.
- * @since 0.8.0
- */
+ /** Predict the rating of one user for one product. */
+ @Since("0.8.0")
def predict(user: Int, product: Int): Double = {
val userVector = userFeatures.lookup(user).head
val productVector = productFeatures.lookup(product).head
@@ -114,8 +114,8 @@ class MatrixFactorizationModel(
*
* @param usersProducts RDD of (user, product) pairs.
* @return RDD of Ratings.
- * @since 0.9.0
*/
+ @Since("0.9.0")
def predict(usersProducts: RDD[(Int, Int)]): RDD[Rating] = {
// Previously the partitions of ratings are only based on the given products.
// So if the usersProducts given for prediction contains only few products or
@@ -146,8 +146,8 @@ class MatrixFactorizationModel(
/**
* Java-friendly version of [[MatrixFactorizationModel.predict]].
- * @since 1.2.0
*/
+ @Since("1.2.0")
def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
predict(usersProducts.rdd.asInstanceOf[RDD[(Int, Int)]]).toJavaRDD()
}
@@ -162,8 +162,8 @@ class MatrixFactorizationModel(
* by score, decreasing. The first returned is the one predicted to be most strongly
* recommended to the user. The score is an opaque value that indicates how strongly
* recommended the product is.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def recommendProducts(user: Int, num: Int): Array[Rating] =
MatrixFactorizationModel.recommend(userFeatures.lookup(user).head, productFeatures, num)
.map(t => Rating(user, t._1, t._2))
@@ -179,8 +179,8 @@ class MatrixFactorizationModel(
* by score, decreasing. The first returned is the one predicted to be most strongly
* recommended to the product. The score is an opaque value that indicates how strongly
* recommended the user is.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def recommendUsers(product: Int, num: Int): Array[Rating] =
MatrixFactorizationModel.recommend(productFeatures.lookup(product).head, userFeatures, num)
.map(t => Rating(t._1, product, t._2))
@@ -199,8 +199,8 @@ class MatrixFactorizationModel(
* @param sc Spark context used to save model data.
* @param path Path specifying the directory in which to save this model.
* If the directory already exists, this method throws an exception.
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
MatrixFactorizationModel.SaveLoadV1_0.save(this, path)
}
@@ -212,8 +212,8 @@ class MatrixFactorizationModel(
* @return [(Int, Array[Rating])] objects, where every tuple contains a userID and an array of
* rating objects which contains the same userId, recommended productID and a "score" in the
* rating field. Semantics of score is same as recommendProducts API
- * @since 1.4.0
*/
+ @Since("1.4.0")
def recommendProductsForUsers(num: Int): RDD[(Int, Array[Rating])] = {
MatrixFactorizationModel.recommendForAll(rank, userFeatures, productFeatures, num).map {
case (user, top) =>
@@ -230,8 +230,8 @@ class MatrixFactorizationModel(
* @return [(Int, Array[Rating])] objects, where every tuple contains a productID and an array
* of rating objects which contains the recommended userId, same productID and a "score" in the
* rating field. Semantics of score is same as recommendUsers API
- * @since 1.4.0
*/
+ @Since("1.4.0")
def recommendUsersForProducts(num: Int): RDD[(Int, Array[Rating])] = {
MatrixFactorizationModel.recommendForAll(rank, productFeatures, userFeatures, num).map {
case (product, top) =>
@@ -241,9 +241,7 @@ class MatrixFactorizationModel(
}
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
import org.apache.spark.mllib.util.Loader._
@@ -326,8 +324,8 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
* @return Model instance
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): MatrixFactorizationModel = {
val (loadedClassName, formatVersion, _) = loadMetadata(sc, path)
val classNameV1_0 = SaveLoadV1_0.thisClassName
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 2980b94de3..509f6a2d16 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.regression
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.feature.StandardScaler
import org.apache.spark.{Logging, SparkException}
import org.apache.spark.rdd.RDD
@@ -35,8 +35,8 @@ import org.apache.spark.storage.StorageLevel
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
@DeveloperApi
abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
extends Serializable {
@@ -56,8 +56,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
* @param testData RDD representing data points to be predicted
* @return RDD[Double] where each entry contains the corresponding prediction
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(testData: RDD[Vector]): RDD[Double] = {
// A small optimization to avoid serializing the entire model. Only the weightsMatrix
// and intercept is needed.
@@ -76,8 +76,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
* @param testData array representing a single data point
* @return Double prediction from the trained model
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(testData: Vector): Double = {
predictPoint(testData, weights, intercept)
}
@@ -95,8 +95,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
* GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).
* This class should be extended with an Optimizer to create a new GLM.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
@DeveloperApi
abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
extends Logging with Serializable {
@@ -106,8 +106,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* The optimizer to solve the problem.
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def optimizer: Optimizer
/** Whether to add intercept (default: false). */
@@ -143,8 +143,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* The dimension of training features.
*
- * @since 1.4.0
*/
+ @Since("1.4.0")
def getNumFeatures: Int = this.numFeatures
/**
@@ -168,16 +168,16 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Get if the algorithm uses addIntercept
*
- * @since 1.4.0
*/
+ @Since("1.4.0")
def isAddIntercept: Boolean = this.addIntercept
/**
* Set if the algorithm should add an intercept. Default false.
* We set the default to false because adding the intercept will cause memory allocation.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setIntercept(addIntercept: Boolean): this.type = {
this.addIntercept = addIntercept
this
@@ -186,8 +186,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Set if the algorithm should validate data before training. Default true.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def setValidateData(validateData: Boolean): this.type = {
this.validateData = validateData
this
@@ -197,8 +197,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
* Run the algorithm with the configured parameters on an input
* RDD of LabeledPoint entries.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def run(input: RDD[LabeledPoint]): M = {
if (numFeatures < 0) {
numFeatures = input.map(_.features.size).first()
@@ -231,8 +231,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
* Run the algorithm with the configured parameters on an input RDD
* of LabeledPoint entries starting from the initial weights provided.
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
if (numFeatures < 0) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 8995591d9e..31ca7c2f20 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -29,7 +29,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD}
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -47,8 +47,8 @@ import org.apache.spark.sql.SQLContext
* Results of isotonic regression and therefore monotone.
* @param isotonic indicates whether this is isotonic or antitonic.
*
- * @since 1.3.0
*/
+@Since("1.3.0")
@Experimental
class IsotonicRegressionModel (
val boundaries: Array[Double],
@@ -64,8 +64,8 @@ class IsotonicRegressionModel (
/**
* A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
*
- * @since 1.4.0
*/
+ @Since("1.4.0")
def this(boundaries: java.lang.Iterable[Double],
predictions: java.lang.Iterable[Double],
isotonic: java.lang.Boolean) = {
@@ -90,8 +90,8 @@ class IsotonicRegressionModel (
* @param testData Features to be labeled.
* @return Predicted labels.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def predict(testData: RDD[Double]): RDD[Double] = {
testData.map(predict)
}
@@ -103,8 +103,8 @@ class IsotonicRegressionModel (
* @param testData Features to be labeled.
* @return Predicted labels.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
}
@@ -125,8 +125,8 @@ class IsotonicRegressionModel (
* as piecewise linear function and interpolated value is returned. In case there are
* multiple values with the same boundary then the same rules as in 2) are used.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def predict(testData: Double): Double = {
def linearInterpolation(x1: Double, y1: Double, x2: Double, y2: Double, x: Double): Double = {
@@ -160,9 +160,7 @@ class IsotonicRegressionModel (
/** A convenient method for boundaries called by the Python API. */
private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
- /**
- * @since 1.4.0
- */
+ @Since("1.4.0")
override def save(sc: SparkContext, path: String): Unit = {
IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
}
@@ -170,9 +168,7 @@ class IsotonicRegressionModel (
override protected def formatVersion: String = "1.0"
}
-/**
- * @since 1.4.0
- */
+@Since("1.4.0")
object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
import org.apache.spark.mllib.util.Loader._
@@ -219,8 +215,8 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
}
/**
- * @since 1.4.0
*/
+ @Since("1.4.0")
override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
implicit val formats = DefaultFormats
val (loadedClassName, version, metadata) = loadMetadata(sc, path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index 8b51011eeb..f7fe1b7b21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.regression
import scala.beans.BeanInfo
+import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.{Vectors, Vector}
import org.apache.spark.mllib.util.NumericParser
import org.apache.spark.SparkException
@@ -29,8 +30,8 @@ import org.apache.spark.SparkException
* @param label Label for this data point.
* @param features List of features for this data point.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
@BeanInfo
case class LabeledPoint(label: Double, features: Vector) {
override def toString: String = {
@@ -41,15 +42,15 @@ case class LabeledPoint(label: Double, features: Vector) {
/**
* Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
*
- * @since 1.1.0
*/
+@Since("1.1.0")
object LabeledPoint {
/**
* Parses a string resulted from `LabeledPoint#toString` into
* an [[org.apache.spark.mllib.regression.LabeledPoint]].
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
def parse(s: String): LabeledPoint = {
if (s.startsWith("(")) {
NumericParser.parse(s) match {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 03eb589b05..556411a366 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -18,6 +18,7 @@
package org.apache.spark.mllib.regression
import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.optimization._
import org.apache.spark.mllib.pmml.PMMLExportable
@@ -31,8 +32,8 @@ import org.apache.spark.rdd.RDD
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
class LassoModel (
override val weights: Vector,
override val intercept: Double)
@@ -46,9 +47,7 @@ class LassoModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}
@@ -56,14 +55,10 @@ class LassoModel (
override protected def formatVersion: String = "1.0"
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object LassoModel extends Loader[LassoModel] {
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): LassoModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -118,8 +113,8 @@ class LassoWithSGD private (
/**
* Top-level methods for calling Lasso.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
object LassoWithSGD {
/**
@@ -137,8 +132,8 @@ object LassoWithSGD {
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -162,8 +157,8 @@ object LassoWithSGD {
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -185,8 +180,8 @@ object LassoWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -205,8 +200,8 @@ object LassoWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int): LassoModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index fb5c220daa..00ab06e3ba 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -18,6 +18,7 @@
package org.apache.spark.mllib.regression
import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.optimization._
import org.apache.spark.mllib.pmml.PMMLExportable
@@ -31,8 +32,8 @@ import org.apache.spark.rdd.RDD
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
class LinearRegressionModel (
override val weights: Vector,
override val intercept: Double)
@@ -46,9 +47,7 @@ class LinearRegressionModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}
@@ -56,14 +55,10 @@ class LinearRegressionModel (
override protected def formatVersion: String = "1.0"
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object LinearRegressionModel extends Loader[LinearRegressionModel] {
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): LinearRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -117,8 +112,8 @@ class LinearRegressionWithSGD private[mllib] (
/**
* Top-level methods for calling LinearRegression.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
object LinearRegressionWithSGD {
/**
@@ -135,8 +130,8 @@ object LinearRegressionWithSGD {
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -158,8 +153,8 @@ object LinearRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -179,8 +174,8 @@ object LinearRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LinearRegressionModel which has the weights and offset from training.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -198,8 +193,8 @@ object LinearRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LinearRegressionModel which has the weights and offset from training.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int): LinearRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index b097fd38fd..0e72d6591c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -19,14 +19,12 @@ package org.apache.spark.mllib.regression
import org.json4s.{DefaultFormats, JValue}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.rdd.RDD
-/**
- * @since 0.8.0
- */
+@Since("0.8.0")
@Experimental
trait RegressionModel extends Serializable {
/**
@@ -35,8 +33,8 @@ trait RegressionModel extends Serializable {
* @param testData RDD representing data points to be predicted
* @return RDD[Double] where each entry contains the corresponding prediction
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(testData: RDD[Vector]): RDD[Double]
/**
@@ -45,8 +43,8 @@ trait RegressionModel extends Serializable {
* @param testData array representing a single data point
* @return Double prediction from the trained model
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(testData: Vector): Double
/**
@@ -54,8 +52,8 @@ trait RegressionModel extends Serializable {
* @param testData JavaRDD representing data points to be predicted
* @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
*
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 5bced6b4b7..21a791d98b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -18,6 +18,7 @@
package org.apache.spark.mllib.regression
import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.optimization._
import org.apache.spark.mllib.pmml.PMMLExportable
@@ -32,8 +33,8 @@ import org.apache.spark.rdd.RDD
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
class RidgeRegressionModel (
override val weights: Vector,
override val intercept: Double)
@@ -47,9 +48,7 @@ class RidgeRegressionModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}
@@ -57,14 +56,10 @@ class RidgeRegressionModel (
override protected def formatVersion: String = "1.0"
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
- /**
- * @since 1.3.0
- */
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): RidgeRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -120,8 +115,8 @@ class RidgeRegressionWithSGD private (
/**
* Top-level methods for calling RidgeRegression.
*
- * @since 0.8.0
*/
+@Since("0.8.0")
object RidgeRegressionWithSGD {
/**
@@ -138,8 +133,8 @@ object RidgeRegressionWithSGD {
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -162,8 +157,8 @@ object RidgeRegressionWithSGD {
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -184,8 +179,8 @@ object RidgeRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a RidgeRegressionModel which has the weights and offset from training.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
@@ -203,8 +198,8 @@ object RidgeRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a RidgeRegressionModel which has the weights and offset from training.
*
- * @since 0.8.0
*/
+ @Since("0.8.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int): RidgeRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index a2ab95c474..cd3ed8a154 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.regression
import scala.reflect.ClassTag
import org.apache.spark.Logging
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream}
@@ -54,8 +54,8 @@ import org.apache.spark.streaming.dstream.DStream
* the model using each of the different sources, in sequence.
*
*
- * @since 1.1.0
*/
+@Since("1.1.0")
@DeveloperApi
abstract class StreamingLinearAlgorithm[
M <: GeneralizedLinearModel,
@@ -70,8 +70,8 @@ abstract class StreamingLinearAlgorithm[
/**
* Return the latest model.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
def latestModel(): M = {
model.get
}
@@ -84,8 +84,8 @@ abstract class StreamingLinearAlgorithm[
*
* @param data DStream containing labeled data
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def trainOn(data: DStream[LabeledPoint]): Unit = {
if (model.isEmpty) {
throw new IllegalArgumentException("Model must be initialized before starting training.")
@@ -106,8 +106,8 @@ abstract class StreamingLinearAlgorithm[
/**
* Java-friendly version of `trainOn`.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
/**
@@ -116,8 +116,8 @@ abstract class StreamingLinearAlgorithm[
* @param data DStream containing feature vectors
* @return DStream containing predictions
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
def predictOn(data: DStream[Vector]): DStream[Double] = {
if (model.isEmpty) {
throw new IllegalArgumentException("Model must be initialized before starting prediction.")
@@ -128,8 +128,8 @@ abstract class StreamingLinearAlgorithm[
/**
* Java-friendly version of `predictOn`.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = {
JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]])
}
@@ -140,8 +140,8 @@ abstract class StreamingLinearAlgorithm[
* @tparam K key type
* @return DStream containing the input keys and the predictions as values
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = {
if (model.isEmpty) {
throw new IllegalArgumentException("Model must be initialized before starting prediction")
@@ -153,8 +153,8 @@ abstract class StreamingLinearAlgorithm[
/**
* Java-friendly version of `predictOnValues`.
*
- * @since 1.3.0
*/
+ @Since("1.3.0")
def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = {
implicit val tag = fakeClassTag[K]
JavaPairDStream.fromPairDStream(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index 93a6753efd..4a856f7f34 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat
import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.rdd.RDD
@@ -37,8 +37,8 @@ import org.apache.spark.rdd.RDD
* .setBandwidth(3.0)
* val densities = kd.estimate(Array(-1.0, 2.0, 5.0))
* }}}
- * @since 1.4.0
*/
+@Since("1.4.0")
@Experimental
class KernelDensity extends Serializable {
@@ -52,8 +52,8 @@ class KernelDensity extends Serializable {
/**
* Sets the bandwidth (standard deviation) of the Gaussian kernel (default: `1.0`).
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setBandwidth(bandwidth: Double): this.type = {
require(bandwidth > 0, s"Bandwidth must be positive, but got $bandwidth.")
this.bandwidth = bandwidth
@@ -62,8 +62,8 @@ class KernelDensity extends Serializable {
/**
* Sets the sample to use for density estimation.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setSample(sample: RDD[Double]): this.type = {
this.sample = sample
this
@@ -71,8 +71,8 @@ class KernelDensity extends Serializable {
/**
* Sets the sample to use for density estimation (for Java users).
- * @since 1.4.0
*/
+ @Since("1.4.0")
def setSample(sample: JavaRDD[java.lang.Double]): this.type = {
this.sample = sample.rdd.asInstanceOf[RDD[Double]]
this
@@ -80,8 +80,8 @@ class KernelDensity extends Serializable {
/**
* Estimates probability density function at the given array of points.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def estimate(points: Array[Double]): Array[Double] = {
val sample = this.sample
val bandwidth = this.bandwidth
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 64e4be0ebb..51b713e263 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.stat
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.{Vectors, Vector}
/**
@@ -33,8 +33,8 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
* Reference: [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]]
* Zero elements (including explicit zero values) are skipped when calling add(),
* to have time complexity O(nnz) instead of O(n) for each column.
- * @since 1.1.0
*/
+@Since("1.1.0")
@DeveloperApi
class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with Serializable {
@@ -53,8 +53,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
*
* @param sample The sample in dense/sparse vector format to be added into this summarizer.
* @return This MultivariateOnlineSummarizer object.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def add(sample: Vector): this.type = {
if (n == 0) {
require(sample.size > 0, s"Vector should have dimension larger than zero.")
@@ -109,8 +109,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
*
* @param other The other MultivariateOnlineSummarizer to be merged.
* @return This MultivariateOnlineSummarizer object.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def merge(other: MultivariateOnlineSummarizer): this.type = {
if (this.totalCnt != 0 && other.totalCnt != 0) {
require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " +
@@ -155,8 +155,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* Sample mean of each dimension.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
override def mean: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
@@ -172,8 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* Sample variance of each dimension.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
override def variance: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
@@ -199,15 +199,15 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* Sample size.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
override def count: Long = totalCnt
/**
* Number of nonzero elements in each dimension.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
override def numNonzeros: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
@@ -217,8 +217,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* Maximum value of each dimension.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
override def max: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
@@ -233,8 +233,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* Minimum value of each dimension.
*
- * @since 1.1.0
*/
+ @Since("1.1.0")
override def min: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
@@ -249,8 +249,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* L2 (Euclidian) norm of each dimension.
*
- * @since 1.2.0
*/
+ @Since("1.2.0")
override def normL2: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
@@ -268,8 +268,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
/**
* L1 norm of each dimension.
*
- * @since 1.2.0
*/
+ @Since("1.2.0")
override def normL1: Vector = {
require(totalCnt > 0, s"Nothing has been added to this summarizer.")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
index 3bb49f1228..39a16fb743 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
@@ -17,59 +17,60 @@
package org.apache.spark.mllib.stat
+import org.apache.spark.annotation.Since
import org.apache.spark.mllib.linalg.Vector
/**
* Trait for multivariate statistical summary of a data matrix.
- * @since 1.0.0
*/
+@Since("1.0.0")
trait MultivariateStatisticalSummary {
/**
* Sample mean vector.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def mean: Vector
/**
* Sample variance vector. Should return a zero vector if the sample size is 1.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def variance: Vector
/**
* Sample size.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def count: Long
/**
* Number of nonzero elements (including explicitly presented zero values) in each column.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def numNonzeros: Vector
/**
* Maximum value of each column.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def max: Vector
/**
* Minimum value of each column.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def min: Vector
/**
* Euclidean magnitude of each column
- * @since 1.2.0
*/
+ @Since("1.2.0")
def normL2: Vector
/**
* L1 norm of each column
- * @since 1.2.0
*/
+ @Since("1.2.0")
def normL1: Vector
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index ef8d786070..84d64a5bfb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat
import scala.annotation.varargs
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.{JavaRDD, JavaDoubleRDD}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.mllib.linalg.{Matrix, Vector}
@@ -32,8 +32,8 @@ import org.apache.spark.rdd.RDD
/**
* :: Experimental ::
* API for statistical functions in MLlib.
- * @since 1.1.0
*/
+@Since("1.1.0")
@Experimental
object Statistics {
@@ -42,8 +42,8 @@ object Statistics {
*
* @param X an RDD[Vector] for which column-wise summary statistics are to be computed.
* @return [[MultivariateStatisticalSummary]] object containing column-wise summary statistics.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def colStats(X: RDD[Vector]): MultivariateStatisticalSummary = {
new RowMatrix(X).computeColumnSummaryStatistics()
}
@@ -54,8 +54,8 @@ object Statistics {
*
* @param X an RDD[Vector] for which the correlation matrix is to be computed.
* @return Pearson correlation matrix comparing columns in X.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def corr(X: RDD[Vector]): Matrix = Correlations.corrMatrix(X)
/**
@@ -71,8 +71,8 @@ object Statistics {
* @param method String specifying the method to use for computing correlation.
* Supported: `pearson` (default), `spearman`
* @return Correlation matrix comparing columns in X.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def corr(X: RDD[Vector], method: String): Matrix = Correlations.corrMatrix(X, method)
/**
@@ -85,14 +85,14 @@ object Statistics {
* @param x RDD[Double] of the same cardinality as y.
* @param y RDD[Double] of the same cardinality as x.
* @return A Double containing the Pearson correlation between the two input RDD[Double]s
- * @since 1.1.0
*/
+ @Since("1.1.0")
def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
/**
* Java-friendly version of [[corr()]]
- * @since 1.4.1
*/
+ @Since("1.4.1")
def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double]): Double =
corr(x.rdd.asInstanceOf[RDD[Double]], y.rdd.asInstanceOf[RDD[Double]])
@@ -109,14 +109,14 @@ object Statistics {
* Supported: `pearson` (default), `spearman`
* @return A Double containing the correlation between the two input RDD[Double]s using the
* specified method.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
/**
* Java-friendly version of [[corr()]]
- * @since 1.4.1
*/
+ @Since("1.4.1")
def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double], method: String): Double =
corr(x.rdd.asInstanceOf[RDD[Double]], y.rdd.asInstanceOf[RDD[Double]], method)
@@ -133,8 +133,8 @@ object Statistics {
* `expected` is rescaled if the `expected` sum differs from the `observed` sum.
* @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
* the method used, and the null hypothesis.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
ChiSqTest.chiSquared(observed, expected)
}
@@ -148,8 +148,8 @@ object Statistics {
* @param observed Vector containing the observed categorical counts/relative frequencies.
* @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
* the method used, and the null hypothesis.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def chiSqTest(observed: Vector): ChiSqTestResult = ChiSqTest.chiSquared(observed)
/**
@@ -159,8 +159,8 @@ object Statistics {
* @param observed The contingency matrix (containing either counts or relative frequencies).
* @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
* the method used, and the null hypothesis.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def chiSqTest(observed: Matrix): ChiSqTestResult = ChiSqTest.chiSquaredMatrix(observed)
/**
@@ -172,13 +172,14 @@ object Statistics {
* Real-valued features will be treated as categorical for each distinct value.
* @return an array containing the ChiSquaredTestResult for every feature against the label.
* The order of the elements in the returned array reflects the order of input features.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = {
ChiSqTest.chiSquaredFeatures(data)
}
/** Java-friendly version of [[chiSqTest()]] */
+ @Since("1.5.0")
def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = chiSqTest(data.rdd)
/**
@@ -194,6 +195,7 @@ object Statistics {
* @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test
* statistic, p-value, and null hypothesis.
*/
+ @Since("1.5.0")
def kolmogorovSmirnovTest(data: RDD[Double], cdf: Double => Double)
: KolmogorovSmirnovTestResult = {
KolmogorovSmirnovTest.testOneSample(data, cdf)
@@ -210,6 +212,7 @@ object Statistics {
* @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test
* statistic, p-value, and null hypothesis.
*/
+ @Since("1.5.0")
@varargs
def kolmogorovSmirnovTest(data: RDD[Double], distName: String, params: Double*)
: KolmogorovSmirnovTestResult = {
@@ -217,6 +220,7 @@ object Statistics {
}
/** Java-friendly version of [[kolmogorovSmirnovTest()]] */
+ @Since("1.5.0")
@varargs
def kolmogorovSmirnovTest(
data: JavaDoubleRDD,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
index 9aa7763d78..bd4d81390b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat.distribution
import breeze.linalg.{DenseVector => DBV, DenseMatrix => DBM, diag, max, eigSym, Vector => BV}
-import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.{Vectors, Vector, Matrices, Matrix}
import org.apache.spark.mllib.util.MLUtils
@@ -32,8 +32,8 @@ import org.apache.spark.mllib.util.MLUtils
*
* @param mu The mean vector of the distribution
* @param sigma The covariance matrix of the distribution
- * @since 1.3.0
*/
+@Since("1.3.0")
@DeveloperApi
class MultivariateGaussian (
val mu: Vector,
@@ -62,15 +62,15 @@ class MultivariateGaussian (
private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants
/** Returns density of this multivariate Gaussian at given point, x
- * @since 1.3.0
*/
+ @Since("1.3.0")
def pdf(x: Vector): Double = {
pdf(x.toBreeze)
}
/** Returns the log-density of this multivariate Gaussian at given point, x
- * @since 1.3.0
*/
+ @Since("1.3.0")
def logpdf(x: Vector): Double = {
logpdf(x.toBreeze)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index e5200b86fd..972841015d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable
import scala.collection.mutable.ArrayBuilder
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.RandomForest.NodeIndexInfo
@@ -43,8 +43,8 @@ import org.apache.spark.util.random.XORShiftRandom
* @param strategy The configuration parameters for the tree algorithm which specify the type
* of algorithm (classification, regression, etc.), feature type (continuous,
* categorical), depth of the tree, quantile calculation strategy, etc.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
@@ -54,8 +54,8 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
* Method to train a decision tree model over an RDD
* @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
* @return DecisionTreeModel that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def run(input: RDD[LabeledPoint]): DecisionTreeModel = {
// Note: random seed will not be used since numTrees = 1.
val rf = new RandomForest(strategy, numTrees = 1, featureSubsetStrategy = "all", seed = 0)
@@ -64,9 +64,7 @@ class DecisionTree (private val strategy: Strategy) extends Serializable with Lo
}
}
-/**
- * @since 1.0.0
- */
+@Since("1.0.0")
object DecisionTree extends Serializable with Logging {
/**
@@ -84,8 +82,8 @@ object DecisionTree extends Serializable with Logging {
* of algorithm (classification, regression, etc.), feature type (continuous,
* categorical), depth of the tree, quantile calculation strategy, etc.
* @return DecisionTreeModel that can be used for prediction
- * @since 1.0.0
- */
+ */
+ @Since("1.0.0")
def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = {
new DecisionTree(strategy).run(input)
}
@@ -106,8 +104,8 @@ object DecisionTree extends Serializable with Logging {
* @param maxDepth Maximum depth of the tree.
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* @return DecisionTreeModel that can be used for prediction
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
algo: Algo,
@@ -134,8 +132,8 @@ object DecisionTree extends Serializable with Logging {
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* @param numClasses number of classes for classification. Default value of 2.
* @return DecisionTreeModel that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def train(
input: RDD[LabeledPoint],
algo: Algo,
@@ -168,8 +166,8 @@ object DecisionTree extends Serializable with Logging {
* E.g., an entry (n -> k) indicates that feature n is categorical
* with k categories indexed from 0: {0, 1, ..., k-1}.
* @return DecisionTreeModel that can be used for prediction
- * @since 1.0.0
*/
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
algo: Algo,
@@ -201,8 +199,8 @@ object DecisionTree extends Serializable with Logging {
* @param maxBins maximum number of bins used for splitting features
* (suggested value: 32)
* @return DecisionTreeModel that can be used for prediction
- * @since 1.1.0
*/
+ @Since("1.1.0")
def trainClassifier(
input: RDD[LabeledPoint],
numClasses: Int,
@@ -217,8 +215,8 @@ object DecisionTree extends Serializable with Logging {
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
- * @since 1.1.0
*/
+ @Since("1.1.0")
def trainClassifier(
input: JavaRDD[LabeledPoint],
numClasses: Int,
@@ -247,8 +245,8 @@ object DecisionTree extends Serializable with Logging {
* @param maxBins maximum number of bins used for splitting features
* (suggested value: 32)
* @return DecisionTreeModel that can be used for prediction
- * @since 1.1.0
*/
+ @Since("1.1.0")
def trainRegressor(
input: RDD[LabeledPoint],
categoricalFeaturesInfo: Map[Int, Int],
@@ -261,8 +259,8 @@ object DecisionTree extends Serializable with Logging {
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
- * @since 1.1.0
*/
+ @Since("1.1.0")
def trainRegressor(
input: JavaRDD[LabeledPoint],
categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index 1436170986..e750408600 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -18,7 +18,7 @@
package org.apache.spark.mllib.tree
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.impl.PeriodicRDDCheckpointer
import org.apache.spark.mllib.regression.LabeledPoint
@@ -48,8 +48,8 @@ import org.apache.spark.storage.StorageLevel
* for other loss functions.
*
* @param boostingStrategy Parameters for the gradient boosting algorithm.
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
extends Serializable with Logging {
@@ -58,8 +58,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
* Method to train a gradient boosting model
* @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
* @return a gradient boosted trees model that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
val algo = boostingStrategy.treeStrategy.algo
algo match {
@@ -76,8 +76,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]].
- * @since 1.2.0
*/
+ @Since("1.2.0")
def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
run(input.rdd)
}
@@ -91,8 +91,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
* E.g., these two datasets could be created from an original dataset
* by using [[org.apache.spark.rdd.RDD.randomSplit()]]
* @return a gradient boosted trees model that can be used for prediction
- * @since 1.4.0
*/
+ @Since("1.4.0")
def runWithValidation(
input: RDD[LabeledPoint],
validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = {
@@ -115,8 +115,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]].
- * @since 1.4.0
*/
+ @Since("1.4.0")
def runWithValidation(
input: JavaRDD[LabeledPoint],
validationInput: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
@@ -124,9 +124,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
}
}
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
object GradientBoostedTrees extends Logging {
/**
@@ -137,8 +135,8 @@ object GradientBoostedTrees extends Logging {
* For regression, labels are real numbers.
* @param boostingStrategy Configuration options for the boosting algorithm.
* @return a gradient boosted trees model that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def train(
input: RDD[LabeledPoint],
boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
@@ -147,8 +145,8 @@ object GradientBoostedTrees extends Logging {
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees$#train]]
- * @since 1.2.0
*/
+ @Since("1.2.0")
def train(
input: JavaRDD[LabeledPoint],
boostingStrategy: BoostingStrategy): GradientBoostedTreesModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 9f3230656a..63a902f3eb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
import scala.collection.JavaConverters._
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.configuration.Strategy
@@ -260,9 +260,7 @@ private class RandomForest (
}
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
object RandomForest extends Serializable with Logging {
/**
@@ -279,8 +277,8 @@ object RandomForest extends Serializable with Logging {
* if numTrees > 1 (forest) set to "sqrt".
* @param seed Random seed for bootstrapping and choosing feature subsets.
* @return a random forest model that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainClassifier(
input: RDD[LabeledPoint],
strategy: Strategy,
@@ -317,8 +315,8 @@ object RandomForest extends Serializable with Logging {
* (suggested value: 100)
* @param seed Random seed for bootstrapping and choosing feature subsets.
* @return a random forest model that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainClassifier(
input: RDD[LabeledPoint],
numClasses: Int,
@@ -337,8 +335,8 @@ object RandomForest extends Serializable with Logging {
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainClassifier]]
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainClassifier(
input: JavaRDD[LabeledPoint],
numClasses: Int,
@@ -368,8 +366,8 @@ object RandomForest extends Serializable with Logging {
* if numTrees > 1 (forest) set to "onethird".
* @param seed Random seed for bootstrapping and choosing feature subsets.
* @return a random forest model that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainRegressor(
input: RDD[LabeledPoint],
strategy: Strategy,
@@ -405,8 +403,8 @@ object RandomForest extends Serializable with Logging {
* (suggested value: 100)
* @param seed Random seed for bootstrapping and choosing feature subsets.
* @return a random forest model that can be used for prediction
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainRegressor(
input: RDD[LabeledPoint],
categoricalFeaturesInfo: Map[Int, Int],
@@ -424,8 +422,8 @@ object RandomForest extends Serializable with Logging {
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.RandomForest$#trainRegressor]]
- * @since 1.2.0
*/
+ @Since("1.2.0")
def trainRegressor(
input: JavaRDD[LabeledPoint],
categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer],
@@ -442,8 +440,8 @@ object RandomForest extends Serializable with Logging {
/**
* List of supported feature subset sampling strategies.
- * @since 1.2.0
*/
+ @Since("1.2.0")
val supportedFeatureSubsetStrategies: Array[String] =
Array("auto", "all", "sqrt", "log2", "onethird")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index d9a49aa71f..8301ad1608 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -17,13 +17,13 @@
package org.apache.spark.mllib.tree.configuration
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
/**
* :: Experimental ::
* Enum to select the algorithm for the decision tree
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
object Algo extends Enumeration {
type Algo = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
index 88e5f57e9a..7c56998197 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.tree.configuration
import scala.beans.BeanProperty
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.tree.configuration.Algo._
import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
@@ -38,8 +38,8 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss}
* validation input between two iterations is less than the validationTol
* then stop. Ignored when
* [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used.
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
case class BoostingStrategy(
// Required boosting parameters
@@ -71,9 +71,7 @@ case class BoostingStrategy(
}
}
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
@Experimental
object BoostingStrategy {
@@ -81,8 +79,8 @@ object BoostingStrategy {
* Returns default configuration for the boosting algorithm
* @param algo Learning goal. Supported: "Classification" or "Regression"
* @return Configuration for boosting algorithm
- * @since 1.2.0
*/
+ @Since("1.2.0")
def defaultParams(algo: String): BoostingStrategy = {
defaultParams(Algo.fromString(algo))
}
@@ -93,8 +91,8 @@ object BoostingStrategy {
* [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
* [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
* @return Configuration for boosting algorithm
- * @since 1.3.0
*/
+ @Since("1.3.0")
def defaultParams(algo: Algo): BoostingStrategy = {
val treeStrategy = Strategy.defaultStrategy(algo)
treeStrategy.maxDepth = 3
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
index 0684cafa48..bb7c7ee4f9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
@@ -17,13 +17,13 @@
package org.apache.spark.mllib.tree.configuration
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
/**
* :: Experimental ::
* Enum to describe whether a feature is "continuous" or "categorical"
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
object FeatureType extends Enumeration {
type FeatureType = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
index 2daa63c4d2..904e42deeb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
@@ -17,13 +17,13 @@
package org.apache.spark.mllib.tree.configuration
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
/**
* :: Experimental ::
* Enum for selecting the quantile calculation strategy
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
object QuantileStrategy extends Enumeration {
type QuantileStrategy = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 7ae25a88bf..a58f01ba85 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.tree.configuration
import scala.beans.BeanProperty
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.tree.impurity.{Variance, Entropy, Gini, Impurity}
import org.apache.spark.mllib.tree.configuration.Algo._
import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
@@ -66,8 +66,8 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
* E.g. 10 means that the cache will get checkpointed every 10 updates. If
* the checkpoint directory is not set in
* [[org.apache.spark.SparkContext]], this setting is ignored.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
class Strategy (
@BeanProperty var algo: Algo,
@@ -85,23 +85,23 @@ class Strategy (
@BeanProperty var checkpointInterval: Int = 10) extends Serializable {
/**
- * @since 1.2.0
*/
+ @Since("1.2.0")
def isMulticlassClassification: Boolean = {
algo == Classification && numClasses > 2
}
/**
- * @since 1.2.0
*/
+ @Since("1.2.0")
def isMulticlassWithCategoricalFeatures: Boolean = {
isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
}
/**
* Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
- * @since 1.1.0
*/
+ @Since("1.1.0")
def this(
algo: Algo,
impurity: Impurity,
@@ -115,8 +115,8 @@ class Strategy (
/**
* Sets Algorithm using a String.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setAlgo(algo: String): Unit = algo match {
case "Classification" => setAlgo(Classification)
case "Regression" => setAlgo(Regression)
@@ -124,8 +124,8 @@ class Strategy (
/**
* Sets categoricalFeaturesInfo using a Java Map.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def setCategoricalFeaturesInfo(
categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = {
this.categoricalFeaturesInfo =
@@ -174,8 +174,8 @@ class Strategy (
/**
* Returns a shallow copy of this instance.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def copy: Strategy = {
new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain,
@@ -183,17 +183,15 @@ class Strategy (
}
}
-/**
- * @since 1.2.0
- */
+@Since("1.2.0")
@Experimental
object Strategy {
/**
* Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
* @param algo "Classification" or "Regression"
- * @since 1.2.0
*/
+ @Since("1.2.0")
def defaultStrategy(algo: String): Strategy = {
defaultStrategy(Algo.fromString(algo))
}
@@ -201,8 +199,8 @@ object Strategy {
/**
* Construct a default set of parameters for [[org.apache.spark.mllib.tree.DecisionTree]]
* @param algo Algo.Classification or Algo.Regression
- * @since 1.3.0
*/
+ @Since("1.3.0")
def defaultStrategy(algo: Algo): Strategy = algo match {
case Algo.Classification =>
new Strategy(algo = Classification, impurity = Gini, maxDepth = 10,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 0b6c7266de..73df6b054a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,14 +17,14 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
/**
* :: Experimental ::
* Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
* binary classification.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
object Entropy extends Impurity {
@@ -36,8 +36,8 @@ object Entropy extends Impurity {
* @param counts Array[Double] with counts for each label
* @param totalCount sum of counts for all labels
* @return information value, or 0 if totalCount = 0
- * @since 1.1.0
*/
+ @Since("1.1.0")
@DeveloperApi
override def calculate(counts: Array[Double], totalCount: Double): Double = {
if (totalCount == 0) {
@@ -64,8 +64,8 @@ object Entropy extends Impurity {
* @param sum sum of labels
* @param sumSquares summation of squares of the labels
* @return information value, or 0 if count = 0
- * @since 1.0.0
*/
+ @Since("1.0.0")
@DeveloperApi
override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
throw new UnsupportedOperationException("Entropy.calculate")
@@ -73,8 +73,8 @@ object Entropy extends Impurity {
/**
* Get this impurity instance.
* This is useful for passing impurity parameters to a Strategy in Java.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def instance: this.type = this
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 3b0be42883..f21845b21a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,15 +17,15 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
/**
* :: Experimental ::
* Class for calculating the
* [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
* during binary classification.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
object Gini extends Impurity {
@@ -35,8 +35,8 @@ object Gini extends Impurity {
* @param counts Array[Double] with counts for each label
* @param totalCount sum of counts for all labels
* @return information value, or 0 if totalCount = 0
- * @since 1.1.0
*/
+ @Since("1.1.0")
@DeveloperApi
override def calculate(counts: Array[Double], totalCount: Double): Double = {
if (totalCount == 0) {
@@ -60,8 +60,8 @@ object Gini extends Impurity {
* @param sum sum of labels
* @param sumSquares summation of squares of the labels
* @return information value, or 0 if count = 0
- * @since 1.0.0
*/
+ @Since("1.0.0")
@DeveloperApi
override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
throw new UnsupportedOperationException("Gini.calculate")
@@ -69,8 +69,8 @@ object Gini extends Impurity {
/**
* Get this impurity instance.
* This is useful for passing impurity parameters to a Strategy in Java.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def instance: this.type = this
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index dd29740005..4637dcceea 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
/**
* :: Experimental ::
@@ -25,8 +25,8 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
* This trait is used for
* (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]]
* (b) calculating impurity values from sufficient statistics.
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
trait Impurity extends Serializable {
@@ -36,8 +36,8 @@ trait Impurity extends Serializable {
* @param counts Array[Double] with counts for each label
* @param totalCount sum of counts for all labels
* @return information value, or 0 if totalCount = 0
- * @since 1.1.0
*/
+ @Since("1.1.0")
@DeveloperApi
def calculate(counts: Array[Double], totalCount: Double): Double
@@ -48,8 +48,8 @@ trait Impurity extends Serializable {
* @param sum sum of labels
* @param sumSquares summation of squares of the labels
* @return information value, or 0 if count = 0
- * @since 1.0.0
*/
+ @Since("1.0.0")
@DeveloperApi
def calculate(count: Double, sum: Double, sumSquares: Double): Double
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index adbe05811f..a74197278d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,13 +17,13 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
/**
* :: Experimental ::
* Class for calculating variance during regression
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
object Variance extends Impurity {
@@ -33,8 +33,8 @@ object Variance extends Impurity {
* @param counts Array[Double] with counts for each label
* @param totalCount sum of counts for all labels
* @return information value, or 0 if totalCount = 0
- * @since 1.1.0
*/
+ @Since("1.1.0")
@DeveloperApi
override def calculate(counts: Array[Double], totalCount: Double): Double =
throw new UnsupportedOperationException("Variance.calculate")
@@ -46,8 +46,8 @@ object Variance extends Impurity {
* @param sum sum of labels
* @param sumSquares summation of squares of the labels
* @return information value, or 0 if count = 0
- * @since 1.0.0
*/
+ @Since("1.0.0")
@DeveloperApi
override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
if (count == 0) {
@@ -60,8 +60,8 @@ object Variance extends Impurity {
/**
* Get this impurity instance.
* This is useful for passing impurity parameters to a Strategy in Java.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def instance: this.type = this
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
index c6e3d0d824..bab7b8c6ca 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.loss
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.model.TreeEnsembleModel
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel
* The absolute (L1) error is defined as:
* |y - F(x)|
* where y is the label and F(x) is the model prediction for features x.
- * @since 1.2.0
*/
+@Since("1.2.0")
@DeveloperApi
object AbsoluteError extends Loss {
@@ -41,8 +41,8 @@ object AbsoluteError extends Loss {
* @param prediction Predicted label.
* @param label True label.
* @return Loss gradient
- * @since 1.2.0
*/
+ @Since("1.2.0")
override def gradient(prediction: Double, label: Double): Double = {
if (label - prediction < 0) 1.0 else -1.0
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
index eee58445a1..b2b4594712 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.loss
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.model.TreeEnsembleModel
import org.apache.spark.mllib.util.MLUtils
@@ -31,8 +31,8 @@ import org.apache.spark.mllib.util.MLUtils
* The log loss is defined as:
* 2 log(1 + exp(-2 y F(x)))
* where y is a label in {-1, 1} and F(x) is the model prediction for features x.
- * @since 1.2.0
*/
+@Since("1.2.0")
@DeveloperApi
object LogLoss extends Loss {
@@ -43,8 +43,8 @@ object LogLoss extends Loss {
* @param prediction Predicted label.
* @param label True label.
* @return Loss gradient
- * @since 1.2.0
*/
+ @Since("1.2.0")
override def gradient(prediction: Double, label: Double): Double = {
- 4.0 * label / (1.0 + math.exp(2.0 * label * prediction))
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
index 7c9fb92464..687cde325f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.loss
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.model.TreeEnsembleModel
import org.apache.spark.rdd.RDD
@@ -26,8 +26,8 @@ import org.apache.spark.rdd.RDD
/**
* :: DeveloperApi ::
* Trait for adding "pluggable" loss functions for the gradient boosting algorithm.
- * @since 1.2.0
*/
+@Since("1.2.0")
@DeveloperApi
trait Loss extends Serializable {
@@ -36,8 +36,8 @@ trait Loss extends Serializable {
* @param prediction Predicted feature
* @param label true label.
* @return Loss gradient.
- * @since 1.2.0
*/
+ @Since("1.2.0")
def gradient(prediction: Double, label: Double): Double
/**
@@ -47,8 +47,8 @@ trait Loss extends Serializable {
* @param model Model of the weak learner.
* @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
* @return Measure of model error on data
- * @since 1.2.0
*/
+ @Since("1.2.0")
def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
data.map(point => computeError(model.predict(point.features), point.label)).mean()
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
index 47dc94cde7..2b112fbe12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
@@ -17,14 +17,12 @@
package org.apache.spark.mllib.tree.loss
-/**
- * @since 1.2.0
- */
+import org.apache.spark.annotation.Since
+
+@Since("1.2.0")
object Losses {
- /**
- * @since 1.2.0
- */
+ @Since("1.2.0")
def fromString(name: String): Loss = name match {
case "leastSquaresError" => SquaredError
case "leastAbsoluteError" => AbsoluteError
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
index ff8903d695..3f7d3d38be 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.loss
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.model.TreeEnsembleModel
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.model.TreeEnsembleModel
* The squared (L2) error is defined as:
* (y - F(x))**2
* where y is the label and F(x) is the model prediction for features x.
- * @since 1.2.0
*/
+@Since("1.2.0")
@DeveloperApi
object SquaredError extends Loss {
@@ -41,8 +41,8 @@ object SquaredError extends Loss {
* @param prediction Predicted label.
* @param label True label.
* @return Loss gradient
- * @since 1.2.0
*/
+ @Since("1.2.0")
override def gradient(prediction: Double, label: Double): Double = {
- 2.0 * (label - prediction)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 0f386a2660..3eefd135f7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -24,7 +24,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.tree.configuration.{Algo, FeatureType}
@@ -40,8 +40,8 @@ import org.apache.spark.util.Utils
* This model stores the decision tree structure and parameters.
* @param topNode root node
* @param algo algorithm type -- classification or regression
- * @since 1.0.0
*/
+@Since("1.0.0")
@Experimental
class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable with Saveable {
@@ -50,8 +50,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
*
* @param features array representing a single data point
* @return Double prediction from the trained model
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(features: Vector): Double = {
topNode.predict(features)
}
@@ -61,8 +61,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
*
* @param features RDD representing data points to be predicted
* @return RDD of predictions for each of the given data points
- * @since 1.0.0
*/
+ @Since("1.0.0")
def predict(features: RDD[Vector]): RDD[Double] = {
features.map(x => predict(x))
}
@@ -72,16 +72,16 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
*
* @param features JavaRDD representing data points to be predicted
* @return JavaRDD of predictions for each of the given data points
- * @since 1.2.0
*/
+ @Since("1.2.0")
def predict(features: JavaRDD[Vector]): JavaRDD[Double] = {
predict(features.rdd)
}
/**
* Get number of nodes in tree, including leaf nodes.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def numNodes: Int = {
1 + topNode.numDescendants
}
@@ -89,8 +89,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
/**
* Get depth of tree.
* E.g.: Depth 0 means 1 leaf node. Depth 1 means 1 internal node and 2 leaf nodes.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def depth: Int = {
topNode.subtreeDepth
}
@@ -119,8 +119,8 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
* @param sc Spark context used to save model data.
* @param path Path specifying the directory in which to save this model.
* If the directory already exists, this method throws an exception.
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
DecisionTreeModel.SaveLoadV1_0.save(sc, path, this)
}
@@ -128,9 +128,7 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
override protected def formatVersion: String = DecisionTreeModel.formatVersion
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
private[spark] def formatVersion: String = "1.0"
@@ -317,8 +315,8 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
* @return Model instance
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): DecisionTreeModel = {
implicit val formats = DefaultFormats
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 23f0363639..091a0462c2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.model
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
/**
@@ -29,8 +29,8 @@ import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
* @param rightImpurity right node impurity
* @param leftPredict left node predict
* @param rightPredict right node predict
- * @since 1.0.0
*/
+@Since("1.0.0")
@DeveloperApi
class InformationGainStats(
val gain: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index aca3350c2e..8c54c55107 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.model
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.Logging
import org.apache.spark.mllib.tree.configuration.FeatureType._
import org.apache.spark.mllib.linalg.Vector
@@ -38,8 +38,8 @@ import org.apache.spark.mllib.linalg.Vector
* @param leftNode left child
* @param rightNode right child
* @param stats information gain stats
- * @since 1.0.0
*/
+@Since("1.0.0")
@DeveloperApi
class Node (
val id: Int,
@@ -59,8 +59,8 @@ class Node (
/**
* build the left node and right nodes if not leaf
* @param nodes array of nodes
- * @since 1.0.0
*/
+ @Since("1.0.0")
@deprecated("build should no longer be used since trees are constructed on-the-fly in training",
"1.2.0")
def build(nodes: Array[Node]): Unit = {
@@ -81,8 +81,8 @@ class Node (
* predict value if node is not leaf
* @param features feature value
* @return predicted value
- * @since 1.1.0
*/
+ @Since("1.1.0")
def predict(features: Vector) : Double = {
if (isLeaf) {
predict.predict
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
index be819b59e7..965784051e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -17,14 +17,14 @@
package org.apache.spark.mllib.tree.model
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
/**
* Predicted value for a node
* @param predict predicted value
* @param prob probability of the label (classification only)
- * @since 1.2.0
*/
+@Since("1.2.0")
@DeveloperApi
class Predict(
val predict: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 18d40530ae..45db83ae3a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.tree.model
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
import org.apache.spark.mllib.tree.configuration.FeatureType
import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
@@ -30,8 +30,8 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
* Split left if feature <= threshold, else right.
* @param featureType type of feature -- categorical or continuous
* @param categories Split left if categorical feature value is in this set, else right.
- * @since 1.0.0
*/
+@Since("1.0.0")
@DeveloperApi
case class Split(
feature: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 0c629b12a8..19571447a2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -45,8 +45,8 @@ import org.apache.spark.util.Utils
*
* @param algo algorithm for the ensemble model, either Classification or Regression
* @param trees tree ensembles
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
class RandomForestModel(override val algo: Algo, override val trees: Array[DecisionTreeModel])
extends TreeEnsembleModel(algo, trees, Array.fill(trees.length)(1.0),
@@ -60,8 +60,8 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis
* @param sc Spark context used to save model data.
* @param path Path specifying the directory in which to save this model.
* If the directory already exists, this method throws an exception.
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
RandomForestModel.SaveLoadV1_0.thisClassName)
@@ -70,9 +70,7 @@ class RandomForestModel(override val algo: Algo, override val trees: Array[Decis
override protected def formatVersion: String = RandomForestModel.formatVersion
}
-/**
- * @since 1.3.0
- */
+@Since("1.3.0")
object RandomForestModel extends Loader[RandomForestModel] {
private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
@@ -82,8 +80,8 @@ object RandomForestModel extends Loader[RandomForestModel] {
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
* @return Model instance
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): RandomForestModel = {
val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path)
val classNameV1_0 = SaveLoadV1_0.thisClassName
@@ -114,8 +112,8 @@ object RandomForestModel extends Loader[RandomForestModel] {
* @param algo algorithm for the ensemble model, either Classification or Regression
* @param trees tree ensembles
* @param treeWeights tree ensemble weights
- * @since 1.2.0
*/
+@Since("1.2.0")
@Experimental
class GradientBoostedTreesModel(
override val algo: Algo,
@@ -130,8 +128,8 @@ class GradientBoostedTreesModel(
* @param sc Spark context used to save model data.
* @param path Path specifying the directory in which to save this model.
* If the directory already exists, this method throws an exception.
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def save(sc: SparkContext, path: String): Unit = {
TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this,
GradientBoostedTreesModel.SaveLoadV1_0.thisClassName)
@@ -143,8 +141,8 @@ class GradientBoostedTreesModel(
* @param loss evaluation metric.
* @return an array with index i having the losses or errors for the ensemble
* containing the first i+1 trees
- * @since 1.4.0
*/
+ @Since("1.4.0")
def evaluateEachIteration(
data: RDD[LabeledPoint],
loss: Loss): Array[Double] = {
@@ -186,8 +184,8 @@ class GradientBoostedTreesModel(
}
/**
- * @since 1.3.0
*/
+@Since("1.3.0")
object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
/**
@@ -199,8 +197,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
* @param loss: evaluation metric.
* @return a RDD with each element being a zip of the prediction and error
* corresponding to every sample.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def computeInitialPredictionAndError(
data: RDD[LabeledPoint],
initTreeWeight: Double,
@@ -223,8 +221,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
* @param loss: evaluation metric.
* @return a RDD with each element being a zip of the prediction and error
* corresponding to each sample.
- * @since 1.4.0
*/
+ @Since("1.4.0")
def updatePredictionError(
data: RDD[LabeledPoint],
predictionAndError: RDD[(Double, Double)],
@@ -248,8 +246,8 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
* @return Model instance
- * @since 1.3.0
*/
+ @Since("1.3.0")
override def load(sc: SparkContext, path: String): GradientBoostedTreesModel = {
val (loadedClassName, version, jsonMetadata) = Loader.loadMetadata(sc, path)
val classNameV1_0 = SaveLoadV1_0.thisClassName
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
index f520b3a1b7..bcaacc1b1f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
@@ -24,7 +24,6 @@ package org.apache.spark.mllib
* - information loss calculation with entropy and Gini for classification and
* variance for regression,
* - both continuous and categorical features.
- * @since 1.0.0
*/
package object tree {
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 11ed23176f..4940974bf4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -21,7 +21,7 @@ import scala.reflect.ClassTag
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.PartitionwiseSampledRDD
@@ -64,8 +64,8 @@ object MLUtils {
* feature dimensions.
* @param minPartitions min number of partitions
* @return labeled data stored as an RDD[LabeledPoint]
- * @since 1.0.0
*/
+ @Since("1.0.0")
def loadLibSVMFile(
sc: SparkContext,
path: String,
@@ -115,9 +115,7 @@ object MLUtils {
// Convenient methods for `loadLibSVMFile`.
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
@deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
def loadLibSVMFile(
sc: SparkContext,
@@ -130,17 +128,15 @@ object MLUtils {
/**
* Loads labeled data in the LIBSVM format into an RDD[LabeledPoint], with the default number of
* partitions.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def loadLibSVMFile(
sc: SparkContext,
path: String,
numFeatures: Int): RDD[LabeledPoint] =
loadLibSVMFile(sc, path, numFeatures, sc.defaultMinPartitions)
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
@deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
def loadLibSVMFile(
sc: SparkContext,
@@ -149,9 +145,7 @@ object MLUtils {
numFeatures: Int): RDD[LabeledPoint] =
loadLibSVMFile(sc, path, numFeatures)
- /**
- * @since 1.0.0
- */
+ @Since("1.0.0")
@deprecated("use method without multiclass argument, which no longer has effect", "1.1.0")
def loadLibSVMFile(
sc: SparkContext,
@@ -162,8 +156,8 @@ object MLUtils {
/**
* Loads binary labeled data in the LIBSVM format into an RDD[LabeledPoint], with number of
* features determined automatically and the default number of partitions.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def loadLibSVMFile(sc: SparkContext, path: String): RDD[LabeledPoint] =
loadLibSVMFile(sc, path, -1)
@@ -193,15 +187,15 @@ object MLUtils {
* @param path file or directory path in any Hadoop-supported file system URI
* @param minPartitions min number of partitions
* @return vectors stored as an RDD[Vector]
- * @since 1.1.0
*/
+ @Since("1.1.0")
def loadVectors(sc: SparkContext, path: String, minPartitions: Int): RDD[Vector] =
sc.textFile(path, minPartitions).map(Vectors.parse)
/**
* Loads vectors saved using `RDD[Vector].saveAsTextFile` with the default number of partitions.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def loadVectors(sc: SparkContext, path: String): RDD[Vector] =
sc.textFile(path, sc.defaultMinPartitions).map(Vectors.parse)
@@ -211,16 +205,16 @@ object MLUtils {
* @param path file or directory path in any Hadoop-supported file system URI
* @param minPartitions min number of partitions
* @return labeled points stored as an RDD[LabeledPoint]
- * @since 1.1.0
*/
+ @Since("1.1.0")
def loadLabeledPoints(sc: SparkContext, path: String, minPartitions: Int): RDD[LabeledPoint] =
sc.textFile(path, minPartitions).map(LabeledPoint.parse)
/**
* Loads labeled points saved using `RDD[LabeledPoint].saveAsTextFile` with the default number of
* partitions.
- * @since 1.1.0
*/
+ @Since("1.1.0")
def loadLabeledPoints(sc: SparkContext, dir: String): RDD[LabeledPoint] =
loadLabeledPoints(sc, dir, sc.defaultMinPartitions)
@@ -236,8 +230,8 @@ object MLUtils {
*
* @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and
* [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@deprecated("Should use MLUtils.loadLabeledPoints instead.", "1.0.1")
def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
sc.textFile(dir).map { line =>
@@ -258,8 +252,8 @@ object MLUtils {
*
* @deprecated Should use [[org.apache.spark.rdd.RDD#saveAsTextFile]] for saving and
* [[org.apache.spark.mllib.util.MLUtils#loadLabeledPoints]] for loading.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@deprecated("Should use RDD[LabeledPoint].saveAsTextFile instead.", "1.0.1")
def saveLabeledData(data: RDD[LabeledPoint], dir: String) {
val dataStr = data.map(x => x.label + "," + x.features.toArray.mkString(" "))
@@ -271,8 +265,8 @@ object MLUtils {
* Return a k element array of pairs of RDDs with the first element of each pair
* containing the training data, a complement of the validation data and the second
* element, the validation data, containing a unique 1/kth of the data. Where k=numFolds.
- * @since 1.0.0
*/
+ @Since("1.0.0")
@Experimental
def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = {
val numFoldsF = numFolds.toFloat
@@ -287,8 +281,8 @@ object MLUtils {
/**
* Returns a new vector with `1.0` (bias) appended to the input vector.
- * @since 1.0.0
*/
+ @Since("1.0.0")
def appendBias(vector: Vector): Vector = {
vector match {
case dv: DenseVector =>