diff options
45 files changed, 43 insertions, 208 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala index 85a413243b..5161bc7265 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala @@ -19,17 +19,15 @@ package org.apache.spark.mllib.classification import org.json4s.{DefaultFormats, JValue} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.rdd.RDD /** - * :: Experimental :: * Represents a classification model that predicts to which of a set of categories an example * belongs. The categories are represented by double values: 0.0, 1.0, 2.0, etc. */ -@Experimental @Since("0.8.0") trait ClassificationModel extends Serializable { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index 5ceff5b225..2d52abc122 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -18,7 +18,7 @@ package org.apache.spark.mllib.classification import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.classification.impl.GLMClassificationModel import org.apache.spark.mllib.linalg.BLAS.dot import org.apache.spark.mllib.linalg.{DenseVector, Vector} @@ -82,35 +82,29 @@ class LogisticRegressionModel @Since("1.3.0") ( private var threshold: Option[Double] = Some(0.5) /** - * :: Experimental :: * Sets the threshold that separates positive predictions from negative predictions * in Binary Logistic Regression. An example with prediction score greater than or equal to * this threshold is identified as an positive, and negative otherwise. The default value is 0.5. * It is only used for binary classification. */ @Since("1.0.0") - @Experimental def setThreshold(threshold: Double): this.type = { this.threshold = Some(threshold) this } /** - * :: Experimental :: * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions. * It is only used for binary classification. */ @Since("1.3.0") - @Experimental def getThreshold: Option[Double] = threshold /** - * :: Experimental :: * Clears the threshold so that `predict` will output raw prediction scores. * It is only used for binary classification. */ @Since("1.0.0") - @Experimental def clearThreshold(): this.type = { threshold = None this @@ -359,13 +353,11 @@ class LogisticRegressionWithLBFGS } /** - * :: Experimental :: * Set the number of possible outcomes for k classes classification problem in * Multinomial Logistic Regression. * By default, it is binary logistic regression so k will be set to 2. */ @Since("1.3.0") - @Experimental def setNumClasses(numClasses: Int): this.type = { require(numClasses > 1) numOfLinearPredictor = numClasses - 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 896565cd90..a8d3fd4177 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -18,7 +18,7 @@ package org.apache.spark.mllib.classification import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.classification.impl.GLMClassificationModel import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.optimization._ @@ -43,32 +43,26 @@ class SVMModel @Since("1.1.0") ( private var threshold: Option[Double] = Some(0.0) /** - * :: Experimental :: * Sets the threshold that separates positive predictions from negative predictions. An example * with prediction score greater than or equal to this threshold is identified as an positive, * and negative otherwise. The default value is 0.0. */ @Since("1.0.0") - @Experimental def setThreshold(threshold: Double): this.type = { this.threshold = Some(threshold) this } /** - * :: Experimental :: * Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions. */ @Since("1.3.0") - @Experimental def getThreshold: Option[Double] = threshold /** - * :: Experimental :: * Clears the threshold so that `predict` will output raw prediction scores. */ @Since("1.0.0") - @Experimental def clearThreshold(): this.type = { threshold = None this diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala index 75630054d1..47bff5ebdd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala @@ -17,12 +17,11 @@ package org.apache.spark.mllib.classification -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.StreamingLinearAlgorithm /** - * :: Experimental :: * Train or predict a logistic regression model on streaming data. Training uses * Stochastic Gradient Descent to update the model based on each new batch of * incoming data from a DStream (see `LogisticRegressionWithSGD` for model equation) @@ -43,7 +42,6 @@ import org.apache.spark.mllib.regression.StreamingLinearAlgorithm * .trainOn(DStream) * }}} */ -@Experimental @Since("1.3.0") class StreamingLogisticRegressionWithSGD private[mllib] ( private var stepSize: Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala index f82bd82c20..7b203e2f40 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala @@ -21,7 +21,7 @@ import scala.collection.mutable.IndexedSeq import breeze.linalg.{diag, DenseMatrix => BreezeMatrix, DenseVector => BDV, Vector => BV} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, Matrices, Vector, Vectors} import org.apache.spark.mllib.stat.distribution.MultivariateGaussian @@ -30,8 +30,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.util.Utils /** - * :: Experimental :: - * * This class performs expectation maximization for multivariate Gaussian * Mixture Models (GMMs). A GMM represents a composite distribution of * independent Gaussian distributions with associated "mixing" weights @@ -52,7 +50,6 @@ import org.apache.spark.util.Utils * is considered to have occurred. * @param maxIterations The maximum number of iterations to perform */ -@Experimental @Since("1.3.0") class GaussianMixture private ( private var k: Int, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index a5902190d4..2115f7d99c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -24,7 +24,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{Vector, Matrices, Matrix} import org.apache.spark.mllib.stat.distribution.MultivariateGaussian @@ -33,8 +33,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SQLContext, Row} /** - * :: Experimental :: - * * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are * the respective mean and covariance for each Gaussian distribution i=1..k. @@ -45,7 +43,6 @@ import org.apache.spark.sql.{SQLContext, Row} * the Multivariate Gaussian (Normal) Distribution for Gaussian i */ @Since("1.3.0") -@Experimental class GaussianMixtureModel @Since("1.3.0") ( @Since("1.3.0") val weights: Array[Double], @Since("1.3.0") val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable { @@ -132,7 +129,6 @@ class GaussianMixtureModel @Since("1.3.0") ( } @Since("1.4.0") -@Experimental object GaussianMixtureModel extends Loader[GaussianMixtureModel] { private object SaveLoadV1_0 { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala index 92a321afb0..eb802a365e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering import breeze.linalg.{DenseVector => BDV} import org.apache.spark.Logging -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.api.java.JavaPairRDD import org.apache.spark.graphx._ import org.apache.spark.mllib.linalg.{Vector, Vectors} @@ -28,8 +28,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.util.Utils /** - * :: Experimental :: - * * Latent Dirichlet Allocation (LDA), a topic model designed for text documents. * * Terminology: @@ -45,7 +43,6 @@ import org.apache.spark.util.Utils * (Wikipedia)]] */ @Since("1.3.0") -@Experimental class LDA private ( private var k: Int, private var maxIterations: Int, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index 15129e0dd5..31d8a9fdea 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -35,14 +35,11 @@ import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.util.BoundedPriorityQueue /** - * :: Experimental :: - * * Latent Dirichlet Allocation (LDA) model. * * This abstraction permits for different underlying representations, * including local and distributed data structures. */ -@Experimental @Since("1.3.0") abstract class LDAModel private[clustering] extends Saveable { @@ -184,15 +181,12 @@ abstract class LDAModel private[clustering] extends Saveable { } /** - * :: Experimental :: - * * Local LDA model. * This model stores only the inferred topics. * It may be used for computing topics for new documents, but it may give less accurate answers * than the [[DistributedLDAModel]]. * @param topics Inferred topics (vocabSize x k matrix). */ -@Experimental @Since("1.3.0") class LocalLDAModel private[clustering] ( @Since("1.3.0") val topics: Matrix, @@ -481,14 +475,11 @@ object LocalLDAModel extends Loader[LocalLDAModel] { } /** - * :: Experimental :: - * * Distributed LDA model. * This model stores the inferred topics, the full training dataset, and the topic distributions. * When computing topics for new documents, it may give more accurate answers * than the [[LocalLDAModel]]. */ -@Experimental @Since("1.3.0") class DistributedLDAModel private[clustering] ( private[clustering] val graph: Graph[LDA.TopicCounts, LDA.TokenCount], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala index 6c76e26fd1..7cd9b08fa8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala @@ -21,7 +21,7 @@ import org.json4s.JsonDSL._ import org.json4s._ import org.json4s.jackson.JsonMethods._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.GraphImpl @@ -33,15 +33,12 @@ import org.apache.spark.util.random.XORShiftRandom import org.apache.spark.{Logging, SparkContext, SparkException} /** - * :: Experimental :: - * * Model produced by [[PowerIterationClustering]]. * * @param k number of clusters * @param assignments an RDD of clustering [[PowerIterationClustering#Assignment]]s */ @Since("1.3.0") -@Experimental class PowerIterationClusteringModel @Since("1.3.0") ( @Since("1.3.0") val k: Int, @Since("1.3.0") val assignments: RDD[PowerIterationClustering.Assignment]) @@ -107,8 +104,6 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode } /** - * :: Experimental :: - * * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by * [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very * low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise @@ -120,7 +115,6 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode * * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]] */ -@Experimental @Since("1.3.0") class PowerIterationClustering private[clustering] ( private var k: Int, @@ -239,17 +233,14 @@ class PowerIterationClustering private[clustering] ( } @Since("1.3.0") -@Experimental object PowerIterationClustering extends Logging { /** - * :: Experimental :: * Cluster assignment. * @param id node id * @param cluster assigned cluster id */ @Since("1.3.0") - @Experimental case class Assignment(id: Long, cluster: Int) /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala index 1d50ffec96..80843719f5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.clustering import scala.reflect.ClassTag import org.apache.spark.Logging -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaSparkContext._ import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} import org.apache.spark.rdd.RDD @@ -30,8 +30,6 @@ import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom /** - * :: Experimental :: - * * StreamingKMeansModel extends MLlib's KMeansModel for streaming * algorithms, so it can keep track of a continuously updated weight * associated with each cluster, and also update the model by @@ -65,7 +63,6 @@ import org.apache.spark.util.random.XORShiftRandom * as batches or points. */ @Since("1.2.0") -@Experimental class StreamingKMeansModel @Since("1.2.0") ( @Since("1.2.0") override val clusterCenters: Array[Vector], @Since("1.2.0") val clusterWeights: Array[Double]) @@ -149,8 +146,6 @@ class StreamingKMeansModel @Since("1.2.0") ( } /** - * :: Experimental :: - * * StreamingKMeans provides methods for configuring a * streaming k-means analysis, training the model on streaming, * and using the model to make predictions on streaming data. @@ -168,7 +163,6 @@ class StreamingKMeansModel @Since("1.2.0") ( * }}} */ @Since("1.2.0") -@Experimental class StreamingKMeans @Since("1.2.0") ( @Since("1.2.0") var k: Int, @Since("1.2.0") var decayFactor: Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala index 508fe532b1..12cf220957 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala @@ -17,15 +17,13 @@ package org.apache.spark.mllib.evaluation -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.Logging -import org.apache.spark.SparkContext._ import org.apache.spark.mllib.evaluation.binary._ import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.sql.DataFrame /** - * :: Experimental :: * Evaluator for binary classification. * * @param scoreAndLabels an RDD of (score, label) pairs. @@ -43,7 +41,6 @@ import org.apache.spark.sql.DataFrame * partition boundaries. */ @Since("1.0.0") -@Experimental class BinaryClassificationMetrics @Since("1.3.0") ( @Since("1.3.0") val scoreAndLabels: RDD[(Double, Double)], @Since("1.3.0") val numBins: Int) extends Logging { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 00e837661d..c5104960cf 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -19,8 +19,7 @@ package org.apache.spark.mllib.evaluation import scala.collection.Map -import org.apache.spark.SparkContext._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.{Matrices, Matrix} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame @@ -32,7 +31,6 @@ import org.apache.spark.sql.DataFrame * @param predictionAndLabels an RDD of (prediction, label) pairs. */ @Since("1.1.0") -@Experimental class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Double)]) { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index a7f43f0b11..cc01936dd3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.Logging -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.{JavaSparkContext, JavaRDD} import org.apache.spark.rdd.RDD @@ -36,7 +36,6 @@ import org.apache.spark.rdd.RDD * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs. */ @Since("1.2.0") -@Experimental class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]) extends Logging with Serializable { @@ -159,7 +158,6 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] } -@Experimental object RankingMetrics { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala index 799ebb980e..1d8f4fe340 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.evaluation -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors @@ -25,13 +25,11 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Multivariate import org.apache.spark.sql.DataFrame /** - * :: Experimental :: * Evaluator for regression. * * @param predictionAndObservations an RDD of (prediction, observation) pairs. */ @Since("1.2.0") -@Experimental class RegressionMetrics @Since("1.2.0") ( predictionAndObservations: RDD[(Double, Double)]) extends Logging { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index 5246faf221..d4d022afde 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -23,7 +23,7 @@ import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.stat.Statistics @@ -33,13 +33,11 @@ import org.apache.spark.SparkContext import org.apache.spark.sql.{SQLContext, Row} /** - * :: Experimental :: * Chi Squared selector model. * * @param selectedFeatures list of indices to select (filter). Must be ordered asc */ @Since("1.3.0") -@Experimental class ChiSqSelectorModel @Since("1.3.0") ( @Since("1.3.0") val selectedFeatures: Array[Int]) extends VectorTransformer with Saveable { @@ -173,7 +171,6 @@ object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] { } /** - * :: Experimental :: * Creates a ChiSquared feature selector. * @param numTopFeatures number of features that selector will select * (ordered by statistic value descending) @@ -181,7 +178,6 @@ object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] { * select all features. */ @Since("1.3.0") -@Experimental class ChiSqSelector @Since("1.3.0") ( @Since("1.3.0") val numTopFeatures: Int) extends Serializable { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index d0a6cf6168..c757fc7f06 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -17,18 +17,16 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg._ /** - * :: Experimental :: * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a * provided "weight" vector. In other words, it scales each column of the dataset by a scalar * multiplier. * @param scalingVec The values used to scale the reference vector's individual components. */ @Since("1.4.0") -@Experimental class ElementwiseProduct @Since("1.4.0") ( @Since("1.4.0") val scalingVec: Vector) extends VectorTransformer { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala index e47d524b61..c93ed64183 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala @@ -22,20 +22,18 @@ import java.lang.{Iterable => JavaIterable} import scala.collection.JavaConverters._ import scala.collection.mutable -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD import org.apache.spark.util.Utils /** - * :: Experimental :: * Maps a sequence of terms to their term frequencies using the hashing trick. * * @param numFeatures number of features (default: 2^20^) */ @Since("1.1.0") -@Experimental class HashingTF(val numFeatures: Int) extends Serializable { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala index 68078ccfa3..cffa9fba05 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala @@ -19,13 +19,12 @@ package org.apache.spark.mllib.feature import breeze.linalg.{DenseVector => BDV} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.rdd.RDD /** - * :: Experimental :: * Inverse document frequency (IDF). * The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`, where `m` is the total * number of documents and `d(t)` is the number of documents that contain term `t`. @@ -38,7 +37,6 @@ import org.apache.spark.rdd.RDD * should appear for filtering */ @Since("1.1.0") -@Experimental class IDF @Since("1.2.0") (@Since("1.2.0") val minDocFreq: Int) { @Since("1.1.0") @@ -159,10 +157,8 @@ private object IDF { } /** - * :: Experimental :: * Represents an IDF model that can transform term frequency vectors. */ -@Experimental @Since("1.1.0") class IDFModel private[spark] (@Since("1.1.0") val idf: Vector) extends Serializable { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala index 8d5a22520d..af0c8e1d8a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala @@ -17,11 +17,10 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} /** - * :: Experimental :: * Normalizes samples individually to unit L^p^ norm * * For any 1 <= p < Double.PositiveInfinity, normalizes samples using @@ -32,7 +31,6 @@ import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors * @param p Normalization in L^p^ space, p = 2 by default. */ @Since("1.1.0") -@Experimental class Normalizer @Since("1.1.0") (p: Double) extends VectorTransformer { @Since("1.1.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index f018b453ba..6fe573c528 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -18,13 +18,12 @@ package org.apache.spark.mllib.feature import org.apache.spark.Logging -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.rdd.RDD /** - * :: Experimental :: * Standardizes features by removing the mean and scaling to unit std using column summary * statistics on the samples in the training set. * @@ -33,7 +32,6 @@ import org.apache.spark.rdd.RDD * @param withStd True by default. Scales the data to unit standard deviation. */ @Since("1.1.0") -@Experimental class StandardScaler @Since("1.1.0") (withMean: Boolean, withStd: Boolean) extends Logging { @Since("1.1.0") @@ -64,7 +62,6 @@ class StandardScaler @Since("1.1.0") (withMean: Boolean, withStd: Boolean) exten } /** - * :: Experimental :: * Represents a StandardScaler model that can transform vectors. * * @param std column standard deviation values @@ -73,7 +70,6 @@ class StandardScaler @Since("1.1.0") (withMean: Boolean, withStd: Boolean) exten * @param withMean whether to center the data before scaling */ @Since("1.1.0") -@Experimental class StandardScalerModel @Since("1.3.0") ( @Since("1.3.0") val std: Vector, @Since("1.1.0") val mean: Vector, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 58857c338f..f3e4d346e3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -31,15 +31,14 @@ import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD -import org.apache.spark.mllib.linalg.{Vector, Vectors, DenseMatrix, BLAS, DenseVector} +import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.{Loader, Saveable} import org.apache.spark.rdd._ import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom -import org.apache.spark.sql.{SQLContext, Row} +import org.apache.spark.sql.SQLContext /** * Entry in vocabulary @@ -53,7 +52,6 @@ private case class VocabWord( ) /** - * :: Experimental :: * Word2Vec creates vector representation of words in a text corpus. * The algorithm first constructs a vocabulary from the corpus * and then learns vector representation of words in the vocabulary. @@ -71,7 +69,6 @@ private case class VocabWord( * Distributed Representations of Words and Phrases and their Compositionality. */ @Since("1.1.0") -@Experimental class Word2Vec extends Serializable with Logging { private var vectorSize = 100 @@ -427,7 +424,6 @@ class Word2Vec extends Serializable with Logging { } /** - * :: Experimental :: * Word2Vec model * @param wordIndex maps each word to an index, which can retrieve the corresponding * vector from wordVectors @@ -435,7 +431,6 @@ class Word2Vec extends Serializable with Logging { * to the word mapped with index i can be retrieved by the slice * (i * vectorSize, i * vectorSize + vectorSize) */ -@Experimental @Since("1.1.0") class Word2VecModel private[mllib] ( private val wordIndex: Map[String, Int], @@ -558,7 +553,6 @@ class Word2VecModel private[mllib] ( } @Since("1.4.0") -@Experimental object Word2VecModel extends Loader[Word2VecModel] { private def buildWordIndex(model: Map[String, Array[Float]]): Map[String, Int] = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index aea5c4f8a8..70ef1ed30c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.{HashPartitioner, Logging, Partitioner, SparkException} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.JavaSparkContext.fakeClassTag import org.apache.spark.mllib.fpm.FPGrowth._ @@ -33,15 +33,11 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: - * * Model trained by [[FPGrowth]], which holds frequent itemsets. * @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]] * @tparam Item item type - * */ @Since("1.3.0") -@Experimental class FPGrowthModel[Item: ClassTag] @Since("1.3.0") ( @Since("1.3.0") val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable { /** @@ -56,8 +52,6 @@ class FPGrowthModel[Item: ClassTag] @Since("1.3.0") ( } /** - * :: Experimental :: - * * A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in * [[http://dx.doi.org/10.1145/1454008.1454027 Li et al., PFP: Parallel FP-Growth for Query * Recommendation]]. PFP distributes computation in such a way that each worker executes an @@ -74,7 +68,6 @@ class FPGrowthModel[Item: ClassTag] @Since("1.3.0") ( * */ @Since("1.3.0") -@Experimental class FPGrowth private ( private var minSupport: Double, private var numPartitions: Int) extends Logging with Serializable { @@ -213,12 +206,7 @@ class FPGrowth private ( } } -/** - * :: Experimental :: - * - */ @Since("1.3.0") -@Experimental object FPGrowth { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala index 4dcf8f28f2..4591cb88ef 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala @@ -20,11 +20,9 @@ package org.apache.spark.mllib.linalg import org.apache.spark.annotation.{Experimental, Since} /** - * :: Experimental :: * Represents singular value decomposition (SVD) factors. */ @Since("1.0.0") -@Experimental case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType) /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 81a6c0550b..09527dcf5d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer import breeze.linalg.{DenseMatrix => BDM} import org.apache.spark.{Logging, Partitioner, SparkException} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix} import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel @@ -115,8 +115,6 @@ private[mllib] object GridPartitioner { } /** - * :: Experimental :: - * * Represents a distributed matrix in blocks of local matrices. * * @param blocks The RDD of sub-matrix blocks ((blockRowIndex, blockColIndex), sub-matrix) that @@ -132,7 +130,6 @@ private[mllib] object GridPartitioner { * zero, the number of columns will be calculated when `numCols` is invoked. */ @Since("1.3.0") -@Experimental class BlockMatrix @Since("1.3.0") ( @Since("1.3.0") val blocks: RDD[((Int, Int), Matrix)], @Since("1.3.0") val rowsPerBlock: Int, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala index 644f293d88..8a70f34e70 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala @@ -19,23 +19,20 @@ package org.apache.spark.mllib.linalg.distributed import breeze.linalg.{DenseMatrix => BDM} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.rdd.RDD import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors} /** - * :: Experimental :: * Represents an entry in an distributed matrix. * @param i row index * @param j column index * @param value value of the entry */ @Since("1.0.0") -@Experimental case class MatrixEntry(i: Long, j: Long, value: Double) /** - * :: Experimental :: * Represents a matrix in coordinate format. * * @param entries matrix entries @@ -45,7 +42,6 @@ case class MatrixEntry(i: Long, j: Long, value: Double) * columns will be determined by the max column index plus one. */ @Since("1.0.0") -@Experimental class CoordinateMatrix @Since("1.0.0") ( @Since("1.0.0") val entries: RDD[MatrixEntry], private var nRows: Long, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index b20ea0dc50..e6af0c0ec7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -19,21 +19,18 @@ package org.apache.spark.mllib.linalg.distributed import breeze.linalg.{DenseMatrix => BDM} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.rdd.RDD import org.apache.spark.mllib.linalg._ import org.apache.spark.mllib.linalg.SingularValueDecomposition /** - * :: Experimental :: * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]]. */ @Since("1.0.0") -@Experimental case class IndexedRow(index: Long, vector: Vector) /** - * :: Experimental :: * Represents a row-oriented [[org.apache.spark.mllib.linalg.distributed.DistributedMatrix]] with * indexed rows. * @@ -44,7 +41,6 @@ case class IndexedRow(index: Long, vector: Vector) * columns will be determined by the size of the first row. */ @Since("1.0.0") -@Experimental class IndexedRowMatrix @Since("1.0.0") ( @Since("1.0.0") val rows: RDD[IndexedRow], private var nRows: Long, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index b8a7adceb1..52c0f19c64 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -26,8 +26,7 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BS import breeze.numerics.{sqrt => brzSqrt} import org.apache.spark.Logging -import org.apache.spark.SparkContext._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg._ import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary} import org.apache.spark.rdd.RDD @@ -35,7 +34,6 @@ import org.apache.spark.util.random.XORShiftRandom import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: * Represents a row-oriented distributed Matrix with no meaningful row indices. * * @param rows rows stored as an RDD[Vector] @@ -45,7 +43,6 @@ import org.apache.spark.storage.StorageLevel * columns will be determined by the size of the first row. */ @Since("1.0.0") -@Experimental class RowMatrix @Since("1.0.0") ( @Since("1.0.0") val rows: RDD[Vector], private var nRows: Long, @@ -676,7 +673,6 @@ class RowMatrix @Since("1.0.0") ( } @Since("1.0.0") -@Experimental object RowMatrix { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala index 41d7c4d355..b0a716936a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.random import scala.reflect.ClassTag import org.apache.spark.SparkContext -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD, JavaSparkContext} import org.apache.spark.api.java.JavaSparkContext.fakeClassTag import org.apache.spark.mllib.linalg.Vector @@ -29,10 +29,8 @@ import org.apache.spark.rdd.RDD import org.apache.spark.util.Utils /** - * :: Experimental :: * Generator methods for creating RDDs comprised of `i.i.d.` samples from some distribution. */ -@Experimental @Since("1.1.0") object RandomRDDs { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 877d31ba41..ec78ea2453 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -29,7 +29,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.{Loader, Saveable} @@ -37,8 +37,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext /** - * :: Experimental :: - * * Regression model for isotonic regression. * * @param boundaries Array of boundaries for which predictions are known. @@ -49,7 +47,6 @@ import org.apache.spark.sql.SQLContext * */ @Since("1.3.0") -@Experimental class IsotonicRegressionModel @Since("1.3.0") ( @Since("1.3.0") val boundaries: Array[Double], @Since("1.3.0") val predictions: Array[Double], @@ -233,8 +230,6 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { } /** - * :: Experimental :: - * * Isotonic regression. * Currently implemented using parallelized pool adjacent violators algorithm. * Only univariate (single feature) algorithm supported. @@ -252,7 +247,6 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { * * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]] */ -@Experimental @Since("1.3.0") class IsotonicRegression private (private var isotonic: Boolean) extends Serializable { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala index 0e72d6591c..a95a54225a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala @@ -19,13 +19,12 @@ package org.apache.spark.mllib.regression import org.json4s.{DefaultFormats, JValue} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.rdd.RDD @Since("0.8.0") -@Experimental trait RegressionModel extends Serializable { /** * Predict values for the given data set using the model trained. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala index fe1d487cdd..fe2a46b9ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala @@ -17,11 +17,10 @@ package org.apache.spark.mllib.regression -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.Vector /** - * :: Experimental :: * Train or predict a linear regression model on streaming data. Training uses * Stochastic Gradient Descent to update the model based on each new batch of * incoming data from a DStream (see `LinearRegressionWithSGD` for model equation) @@ -40,7 +39,6 @@ import org.apache.spark.mllib.linalg.Vector * .setInitialWeights(Vectors.dense(...)) * .trainOn(DStream) */ -@Experimental @Since("1.1.0") class StreamingLinearRegressionWithSGD private[mllib] ( private var stepSize: Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala index 4a856f7f34..f253963270 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala @@ -19,12 +19,11 @@ package org.apache.spark.mllib.stat import com.github.fommil.netlib.BLAS.{getInstance => blas} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.rdd.RDD /** - * :: Experimental :: * Kernel density estimation. Given a sample from a population, estimate its probability density * function at each of the given evaluation points using kernels. Only Gaussian kernel is supported. * @@ -39,7 +38,6 @@ import org.apache.spark.rdd.RDD * }}} */ @Since("1.4.0") -@Experimental class KernelDensity extends Serializable { import KernelDensity._ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala index 84d64a5bfb..bcb33a7a04 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat import scala.annotation.varargs -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.{JavaRDD, JavaDoubleRDD} import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.mllib.linalg.{Matrix, Vector} @@ -30,11 +30,9 @@ import org.apache.spark.mllib.stat.test.{ChiSqTest, ChiSqTestResult, KolmogorovS import org.apache.spark.rdd.RDD /** - * :: Experimental :: * API for statistical functions in MLlib. */ @Since("1.1.0") -@Experimental object Statistics { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala index b0916d3e84..8a29fd39a9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala @@ -20,11 +20,9 @@ package org.apache.spark.mllib.stat.test import org.apache.spark.annotation.{Experimental, Since} /** - * :: Experimental :: * Trait for hypothesis test results. * @tparam DF Return type of `degreesOfFreedom`. */ -@Experimental @Since("1.1.0") trait TestResult[DF] { @@ -79,10 +77,8 @@ trait TestResult[DF] { } /** - * :: Experimental :: * Object containing the test results for the chi-squared hypothesis test. */ -@Experimental @Since("1.1.0") class ChiSqTestResult private[stat] (override val pValue: Double, @Since("1.1.0") override val degreesOfFreedom: Int, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index 53d6482f80..af1f7e74c0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -21,7 +21,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.Logging -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.RandomForest.NodeIndexInfo @@ -36,7 +36,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.util.random.XORShiftRandom /** - * :: Experimental :: * A class which implements a decision tree learning algorithm for classification and regression. * It supports both continuous and categorical features. * @param strategy The configuration parameters for the tree algorithm which specify the type @@ -44,7 +43,6 @@ import org.apache.spark.util.random.XORShiftRandom * categorical), depth of the tree, quantile calculation strategy, etc. */ @Since("1.0.0") -@Experimental class DecisionTree @Since("1.0.0") (private val strategy: Strategy) extends Serializable with Logging { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala index 66a07e3136..729a211574 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala @@ -18,7 +18,7 @@ package org.apache.spark.mllib.tree import org.apache.spark.Logging -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.impl.PeriodicRDDCheckpointer import org.apache.spark.mllib.regression.LabeledPoint @@ -31,7 +31,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: * A class that implements * [[http://en.wikipedia.org/wiki/Gradient_boosting Stochastic Gradient Boosting]] * for regression and binary classification. @@ -50,7 +49,6 @@ import org.apache.spark.storage.StorageLevel * @param boostingStrategy Parameters for the gradient boosting algorithm. */ @Since("1.2.0") -@Experimental class GradientBoostedTrees @Since("1.2.0") (private val boostingStrategy: BoostingStrategy) extends Serializable with Logging { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index 63a902f3eb..a684cdd18c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -23,7 +23,7 @@ import scala.collection.mutable import scala.collection.JavaConverters._ import org.apache.spark.Logging -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.configuration.Strategy @@ -39,7 +39,6 @@ import org.apache.spark.util.Utils import org.apache.spark.util.random.SamplingUtils /** - * :: Experimental :: * A class that implements a [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] * learning algorithm for classification and regression. * It supports both continuous and categorical features. @@ -66,7 +65,6 @@ import org.apache.spark.util.random.SamplingUtils * to "onethird" for regression. * @param seed Random seed for bootstrapping and choosing feature subsets. */ -@Experimental private class RandomForest ( private val strategy: Strategy, private val numTrees: Int, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index fc13bcfd8e..d2513a9d5c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -19,12 +19,11 @@ package org.apache.spark.mllib.tree.configuration import scala.beans.BeanProperty -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} /** - * :: Experimental :: * Configuration options for [[org.apache.spark.mllib.tree.GradientBoostedTrees]]. * * @param treeStrategy Parameters for the tree algorithm. We support regression and binary @@ -47,7 +46,6 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} * [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used. */ @Since("1.2.0") -@Experimental case class BoostingStrategy @Since("1.4.0") ( // Required boosting parameters @Since("1.2.0") @BeanProperty var treeStrategy: Strategy, @@ -79,7 +77,6 @@ case class BoostingStrategy @Since("1.4.0") ( } @Since("1.2.0") -@Experimental object BoostingStrategy { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala index 4e0cd473de..1470295d8a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala @@ -17,14 +17,12 @@ package org.apache.spark.mllib.tree.configuration -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since /** - * :: Experimental :: * Enum to describe whether a feature is "continuous" or "categorical" */ @Since("1.0.0") -@Experimental object FeatureType extends Enumeration { @Since("1.0.0") type FeatureType = Value diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala index 8262db8a4f..1c16f136eb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala @@ -17,14 +17,12 @@ package org.apache.spark.mllib.tree.configuration -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since /** - * :: Experimental :: * Enum for selecting the quantile calculation strategy */ @Since("1.0.0") -@Experimental object QuantileStrategy extends Enumeration { @Since("1.0.0") type QuantileStrategy = Value diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index 89cc13b7c0..372d6617a4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -20,13 +20,12 @@ package org.apache.spark.mllib.tree.configuration import scala.beans.BeanProperty import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.mllib.tree.impurity.{Variance, Entropy, Gini, Impurity} import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ /** - * :: Experimental :: * Stores all the configuration options for tree construction * @param algo Learning goal. Supported: * [[org.apache.spark.mllib.tree.configuration.Algo.Classification]], @@ -68,7 +67,6 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ * [[org.apache.spark.SparkContext]], this setting is ignored. */ @Since("1.0.0") -@Experimental class Strategy @Since("1.3.0") ( @Since("1.0.0") @BeanProperty var algo: Algo, @Since("1.0.0") @BeanProperty var impurity: Impurity, @@ -179,7 +177,6 @@ class Strategy @Since("1.3.0") ( } @Since("1.2.0") -@Experimental object Strategy { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala index aac84243d5..70afaa162b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/TimeTracker.scala @@ -19,12 +19,9 @@ package org.apache.spark.mllib.tree.impl import scala.collection.mutable.{HashMap => MutableHashMap} -import org.apache.spark.annotation.Experimental - /** * Time tracker implementation which holds labeled timers. */ -@Experimental private[spark] class TimeTracker extends Serializable { private val starts: MutableHashMap[String, Long] = new MutableHashMap[String, Long]() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index e1bf23f4c3..54c136aecf 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -24,7 +24,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.{Logging, SparkContext} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.tree.configuration.{Algo, FeatureType} @@ -35,14 +35,12 @@ import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.apache.spark.util.Utils /** - * :: Experimental :: * Decision tree model for classification or regression. * This model stores the decision tree structure and parameters. * @param topNode root node * @param algo algorithm type -- classification or regression */ @Since("1.0.0") -@Experimental class DecisionTreeModel @Since("1.0.0") ( @Since("1.0.0") val topNode: Node, @Since("1.0.0") val algo: Algo) extends Serializable with Saveable { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index df5b8feab5..90e032e3d9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -25,7 +25,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.{Logging, SparkContext} -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint @@ -38,16 +38,13 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.util.Utils - /** - * :: Experimental :: * Represents a random forest model. * * @param algo algorithm for the ensemble model, either Classification or Regression * @param trees tree ensembles */ @Since("1.2.0") -@Experimental class RandomForestModel @Since("1.2.0") ( @Since("1.2.0") override val algo: Algo, @Since("1.2.0") override val trees: Array[DecisionTreeModel]) @@ -108,7 +105,6 @@ object RandomForestModel extends Loader[RandomForestModel] { } /** - * :: Experimental :: * Represents a gradient boosted trees model. * * @param algo algorithm for the ensemble model, either Classification or Regression @@ -116,7 +112,6 @@ object RandomForestModel extends Loader[RandomForestModel] { * @param treeWeights tree ensemble weights */ @Since("1.2.0") -@Experimental class GradientBoostedTreesModel @Since("1.2.0") ( @Since("1.2.0") override val algo: Algo, @Since("1.2.0") override val trees: Array[DecisionTreeModel], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 81c2f0ce6e..414ea99cfd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -19,9 +19,7 @@ package org.apache.spark.mllib.util import scala.reflect.ClassTag -import breeze.linalg.{DenseVector => BDV, SparseVector => BSV} - -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.rdd.PartitionwiseSampledRDD @@ -30,8 +28,6 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, Vectors} import org.apache.spark.mllib.linalg.BLAS.dot import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.StreamingContext -import org.apache.spark.streaming.dstream.DStream /** * Helper methods to load, save and pre-process data used in ML Lib. @@ -263,13 +259,11 @@ object MLUtils { } /** - * :: Experimental :: * Return a k element array of pairs of RDDs with the first element of each pair * containing the training data, a complement of the validation data and the second * element, the validation data, containing a unique 1/kth of the data. Where k=numFolds. */ @Since("1.0.0") - @Experimental def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = { val numFoldsF = numFolds.toFloat (1 to numFolds).map { fold => |