aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2016-07-13 12:33:39 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-07-13 12:33:39 -0700
commit01f09b161217193b797c8c85969d17054c958615 (patch)
tree40d7d4f5932157f8e0f0c13228dd18063728d4d3 /mllib/src/main
parentd8220c1e5e94abbdb9643672b918f0d748206db9 (diff)
downloadspark-01f09b161217193b797c8c85969d17054c958615.tar.gz
spark-01f09b161217193b797c8c85969d17054c958615.tar.bz2
spark-01f09b161217193b797c8c85969d17054c958615.zip
[SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML
## What changes were proposed in this pull request? General decisions to follow, except where noted: * spark.mllib, pyspark.mllib: Remove all Experimental annotations. Leave DeveloperApi annotations alone. * spark.ml, pyspark.ml ** Annotate Estimator-Model pairs of classes and companion objects the same way. ** For all algorithms marked Experimental with Since tag <= 1.6, remove Experimental annotation. ** For all algorithms marked Experimental with Since tag = 2.0, leave Experimental annotation. * DeveloperApi annotations are left alone, except where noted. * No changes to which types are sealed. Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, mainly because the items are new: * Model Summary classes * MLWriter, MLReader, MLWritable, MLReadable * Evaluator and subclasses: There is discussion of changes around evaluating multiple metrics at once for efficiency. * RFormula: Its behavior may need to change slightly to match R in edge cases. * AFTSurvivalRegression * MultilayerPerceptronClassifier DeveloperApi changes: * ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi ## How was this patch tested? N/A Note to reviewers: * spark.ml.clustering.LDA underwent significant changes (additional methods), so let me know if you want me to leave it Experimental. * Be careful to check for cases where a class should no longer be Experimental but has an Experimental method, val, or other feature. I did not find such cases, but please verify. Author: Joseph K. Bradley <joseph@databricks.com> Closes #14147 from jkbradley/experimental-audit.
Diffstat (limited to 'mllib/src/main')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala6
-rwxr-xr-xmllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/param/params.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala4
60 files changed, 63 insertions, 271 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index a1d08b3a6e..d18fb69799 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -27,7 +27,7 @@ import org.json4s._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.param.{Param, ParamMap, Params}
import org.apache.spark.ml.util._
@@ -78,7 +78,6 @@ abstract class PipelineStage extends Params with Logging {
}
/**
- * :: Experimental ::
* A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each
* of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the
* stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will
@@ -90,7 +89,6 @@ abstract class PipelineStage extends Params with Logging {
* an identity transformer.
*/
@Since("1.2.0")
-@Experimental
class Pipeline @Since("1.4.0") (
@Since("1.4.0") override val uid: String) extends Estimator[PipelineModel] with MLWritable {
@@ -282,11 +280,9 @@ object Pipeline extends MLReadable[Pipeline] {
}
/**
- * :: Experimental ::
* Represents a fitted pipeline.
*/
@Since("1.2.0")
-@Experimental
class PipelineModel private[ml] (
@Since("1.4.0") override val uid: String,
@Since("1.4.0") val stages: Array[Transformer])
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index c65d3d5b54..082848c9de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
import org.apache.spark.ml.param.ParamMap
@@ -36,14 +36,12 @@ import org.apache.spark.sql.Dataset
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
* for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
@Since("1.4.0")
-@Experimental
class DecisionTreeClassifier @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
extends ProbabilisticClassifier[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel]
@@ -127,7 +125,6 @@ class DecisionTreeClassifier @Since("1.4.0") (
}
@Since("1.4.0")
-@Experimental
object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifier] {
/** Accessor for supported impurities: entropy, gini */
@Since("1.4.0")
@@ -138,13 +135,11 @@ object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifi
}
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
@Since("1.4.0")
-@Experimental
class DecisionTreeClassificationModel private[ml] (
@Since("1.4.0")override val uid: String,
@Since("1.4.0")override val rootNode: Node,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 4e534baddc..5946a12933 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -21,7 +21,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.feature.LabeledPoint
@@ -40,7 +40,6 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.DoubleType
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* learning algorithm for classification.
* It supports binary labels, as well as both continuous and categorical features.
@@ -57,7 +56,6 @@ import org.apache.spark.sql.types.DoubleType
* [https://issues.apache.org/jira/browse/SPARK-4240]
*/
@Since("1.4.0")
-@Experimental
class GBTClassifier @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
@@ -149,7 +147,6 @@ class GBTClassifier @Since("1.4.0") (
}
@Since("1.4.0")
-@Experimental
object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
/** Accessor for supported loss settings: logistic */
@@ -161,7 +158,6 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
}
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* model for classification.
* It supports binary labels, as well as both continuous and categorical features.
@@ -171,7 +167,6 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
@Since("1.6.0")
-@Experimental
class GBTClassificationModel private[ml](
@Since("1.6.0") override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 9c9f5ced4e..e157bdeb5b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -151,13 +151,11 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
}
/**
- * :: Experimental ::
* Logistic regression.
* Currently, this class only supports binary classification. It will support multiclass
* in the future.
*/
@Since("1.2.0")
-@Experimental
class LogisticRegression @Since("1.2.0") (
@Since("1.4.0") override val uid: String)
extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel]
@@ -475,11 +473,9 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
}
/**
- * :: Experimental ::
* Model produced by [[LogisticRegression]].
*/
@Since("1.4.0")
-@Experimental
class LogisticRegressionModel private[spark] (
@Since("1.4.0") override val uid: String,
@Since("2.0.0") val coefficients: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index c99ae30155..ab977c8802 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.linalg._
import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
@@ -63,7 +63,6 @@ private[ml] trait NaiveBayesParams extends PredictorParams {
}
/**
- * :: Experimental ::
* Naive Bayes Classifiers.
* It supports both Multinomial NB
* ([[http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html]])
@@ -74,7 +73,6 @@ private[ml] trait NaiveBayesParams extends PredictorParams {
* The input feature values must be nonnegative.
*/
@Since("1.5.0")
-@Experimental
class NaiveBayes @Since("1.5.0") (
@Since("1.5.0") override val uid: String)
extends ProbabilisticClassifier[Vector, NaiveBayes, NaiveBayesModel]
@@ -121,14 +119,12 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] {
}
/**
- * :: Experimental ::
* Model produced by [[NaiveBayes]]
* @param pi log of class priors, whose dimension is C (number of classes)
* @param theta log of class conditional probabilities, whose dimension is C (number of classes)
* by D (number of features)
*/
@Since("1.5.0")
-@Experimental
class NaiveBayesModel private[ml] (
@Since("1.5.0") override val uid: String,
@Since("2.0.0") val pi: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 047a378b79..f4ab0a074c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -29,7 +29,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml._
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.linalg.Vector
@@ -117,7 +117,6 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
}
/**
- * :: Experimental ::
* Model produced by [[OneVsRest]].
* This stores the models resulting from training k binary classifiers: one for each class.
* Each example is scored against all k models, and the model with the highest score
@@ -130,7 +129,6 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
* (taking label 0).
*/
@Since("1.4.0")
-@Experimental
final class OneVsRestModel private[ml] (
@Since("1.4.0") override val uid: String,
private[ml] val labelMetadata: Metadata,
@@ -260,8 +258,6 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
}
/**
- * :: Experimental ::
- *
* Reduction of Multiclass Classification to Binary Classification.
* Performs reduction using one against all strategy.
* For a multiclass classification with k classes, train k models (one per class).
@@ -269,7 +265,6 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
* is picked to label the example.
*/
@Since("1.4.0")
-@Experimental
final class OneVsRest @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
extends Estimator[OneVsRestModel] with OneVsRestParams with MLWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 9a26a5c5b1..4ab132e5f2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
import org.apache.spark.ml.param.ParamMap
@@ -36,14 +36,12 @@ import org.apache.spark.sql.functions._
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for
* classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
@Since("1.4.0")
-@Experimental
class RandomForestClassifier @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
extends ProbabilisticClassifier[Vector, RandomForestClassifier, RandomForestClassificationModel]
@@ -124,7 +122,6 @@ class RandomForestClassifier @Since("1.4.0") (
}
@Since("1.4.0")
-@Experimental
object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifier] {
/** Accessor for supported impurity settings: entropy, gini */
@Since("1.4.0")
@@ -140,7 +137,6 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi
}
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
@@ -149,7 +145,6 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi
* Warning: These have null parents.
*/
@Since("1.4.0")
-@Experimental
class RandomForestClassificationModel private[ml] (
@Since("1.5.0") override val uid: String,
private val _trees: Array[DecisionTreeClassificationModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index fa9634fdfa..2b0862c60f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import scala.collection.mutable.ArrayBuilder
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.BinaryAttribute
import org.apache.spark.ml.linalg._
@@ -31,10 +31,8 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
/**
- * :: Experimental ::
* Binarize a column of continuous features given a threshold.
*/
-@Experimental
@Since("1.4.0")
final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index caffc39e2b..100d9e7f6c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import java.{util => ju}
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Model
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.param._
@@ -31,10 +31,8 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
/**
- * :: Experimental ::
* `Bucketizer` maps a column of continuous features to a column of feature buckets.
*/
-@Experimental
@Since("1.4.0")
final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index 712634dffb..bd053e886f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml._
import org.apache.spark.ml.attribute.{AttributeGroup, _}
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
@@ -57,11 +57,9 @@ private[feature] trait ChiSqSelectorParams extends Params
}
/**
- * :: Experimental ::
* Chi-Squared feature selection, which selects categorical features to use for predicting a
* categorical label.
*/
-@Experimental
@Since("1.6.0")
final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String)
extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable {
@@ -116,10 +114,8 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] {
}
/**
- * :: Experimental ::
* Model fitted by [[ChiSqSelector]].
*/
-@Experimental
@Since("1.6.0")
final class ChiSqSelectorModel private[ml] (
@Since("1.6.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index 96e6f1c512..6299f74a6b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -18,7 +18,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.linalg.{Vectors, VectorUDT}
@@ -116,10 +116,8 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
}
/**
- * :: Experimental ::
* Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]].
*/
-@Experimental
@Since("1.5.0")
class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable {
@@ -201,11 +199,9 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] {
}
/**
- * :: Experimental ::
* Converts a text document to a sparse vector of token counts.
* @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted.
*/
-@Experimental
@Since("1.5.0")
class CountVectorizerModel(
@Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 9605145e12..6ff36b35ca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import edu.emory.mathcs.jtransforms.dct._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
import org.apache.spark.ml.param.BooleanParam
@@ -27,7 +27,6 @@ import org.apache.spark.ml.util._
import org.apache.spark.sql.types.DataType
/**
- * :: Experimental ::
* A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero
* padding is performed on the input vector.
* It returns a real vector of the same length representing the DCT. The return vector is scaled
@@ -35,7 +34,6 @@ import org.apache.spark.sql.types.DataType
*
* More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
*/
-@Experimental
@Since("1.5.0")
class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
index d07833e580..f860b3a787 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
import org.apache.spark.ml.param.Param
@@ -27,12 +27,10 @@ import org.apache.spark.mllib.linalg.VectorImplicits._
import org.apache.spark.sql.types.DataType
/**
- * :: Experimental ::
* Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
* provided "weight" vector. In other words, it scales each column of the dataset by a scalar
* multiplier.
*/
-@Experimental
@Since("1.4.0")
class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index 6ca7336cd0..a8792a35ff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.param._
@@ -29,7 +29,6 @@ import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.{ArrayType, StructType}
/**
- * :: Experimental ::
* Maps a sequence of terms to their term frequencies using the hashing trick.
* Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
* to calculate the hash code value for the term object.
@@ -37,7 +36,6 @@ import org.apache.spark.sql.types.{ArrayType, StructType}
* it is advisable to use a power of two as the numFeatures parameter;
* otherwise the features will not be mapped evenly to the columns.
*/
-@Experimental
@Since("1.2.0")
class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 5d6287f0e3..6386dd8a10 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml._
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
import org.apache.spark.ml.param._
@@ -61,10 +61,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
}
/**
- * :: Experimental ::
* Compute the Inverse Document Frequency (IDF) given a collection of documents.
*/
-@Experimental
@Since("1.4.0")
final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Estimator[IDFModel] with IDFBase with DefaultParamsWritable {
@@ -111,10 +109,8 @@ object IDF extends DefaultParamsReadable[IDF] {
}
/**
- * :: Experimental ::
* Model fitted by [[IDF]].
*/
-@Experimental
@Since("1.4.0")
class IDFModel private[ml] (
@Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
index dca28b5c5d..7b11f86279 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import scala.collection.mutable.ArrayBuilder
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -32,7 +32,6 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
/**
- * :: Experimental ::
* Implements the feature interaction transform. This transformer takes in Double and Vector type
* columns and outputs a flattened vector of their feature interactions. To handle interaction,
* we first one-hot encode any nominal features. Then, a vector of the feature cross-products is
@@ -42,7 +41,6 @@ import org.apache.spark.sql.types._
* `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal
* with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.
*/
-@Experimental
@Since("1.6.0")
class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
with HasInputCols with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
index f7f1d42039..6cefa7086c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
@@ -23,6 +23,8 @@ import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.linalg.Vector
/**
+ * :: Experimental ::
+ *
* Class that represents the features and labels of a data point.
*
* @param label Label for this data point.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index d5ad5abced..7b03f0c0f3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
import org.apache.spark.ml.param.{DoubleParam, ParamMap, Params}
@@ -74,7 +74,6 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
}
/**
- * :: Experimental ::
* Rescale each feature individually to a common range [min, max] linearly using column summary
* statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
* feature E is calculated as,
@@ -85,7 +84,6 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
* Note that since zero values will probably be transformed to non-zero values, output of the
* transformer will be DenseVector even for sparse input.
*/
-@Experimental
@Since("1.5.0")
class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[MinMaxScalerModel] with MinMaxScalerParams with DefaultParamsWritable {
@@ -138,7 +136,6 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] {
}
/**
- * :: Experimental ::
* Model fitted by [[MinMaxScaler]].
*
* @param originalMin min value for each original column during fitting
@@ -146,7 +143,6 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] {
*
* TODO: The transformer does not yet set the metadata in the output column (SPARK-8529).
*/
-@Experimental
@Since("1.5.0")
class MinMaxScalerModel private[ml] (
@Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
index 9c1f1ad443..4463aea009 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
@@ -17,14 +17,13 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param._
import org.apache.spark.ml.util._
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
/**
- * :: Experimental ::
* A feature transformer that converts the input array of strings into an array of n-grams. Null
* values in the input array are ignored.
* It returns an array of n-grams where each n-gram is represented by a space-separated string of
@@ -34,7 +33,6 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
* When the input array length is less than n (number of elements per n-gram), no n-grams are
* returned.
*/
-@Experimental
@Since("1.5.0")
class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends UnaryTransformer[Seq[String], Seq[String], NGram] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index f9cbad90c9..eb06900580 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
import org.apache.spark.ml.param.{DoubleParam, ParamValidators}
@@ -27,10 +27,8 @@ import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
import org.apache.spark.sql.types.DataType
/**
- * :: Experimental ::
* Normalize a vector to have unit norm using the given p-norm.
*/
-@Experimental
@Since("1.4.0")
class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 01828ede6b..8b04b5de6f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.linalg.Vectors
@@ -29,7 +29,6 @@ import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
/**
- * :: Experimental ::
* A one-hot encoder that maps a column of category indices to a column of binary vectors, with
* at most a single one-value per row that indicates the input category index.
* For example with 5 categories, an input value of 2.0 would map to an output vector of
@@ -42,7 +41,6 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
*
* @see [[StringIndexer]] for converting categorical values into category indices
*/
-@Experimental
@Since("1.4.0")
class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer
with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index ef8b08545d..6b913480fd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml._
import org.apache.spark.ml.linalg._
import org.apache.spark.ml.param._
@@ -59,12 +59,11 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
}
}
+
/**
- * :: Experimental ::
* PCA trains a model to project vectors to a lower dimensional space of the top [[PCA!.k]]
* principal components.
*/
-@Experimental
@Since("1.5.0")
class PCA @Since("1.5.0") (
@Since("1.5.0") override val uid: String)
@@ -116,14 +115,12 @@ object PCA extends DefaultParamsReadable[PCA] {
}
/**
- * :: Experimental ::
* Model fitted by [[PCA]]. Transforms vectors to a lower dimensional space.
*
* @param pc A principal components Matrix. Each column is one principal component.
* @param explainedVariance A vector of proportions of variance explained by
* each principal component.
*/
-@Experimental
@Since("1.5.0")
class PCAModel private[ml] (
@Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 7b35fdeaf4..72fb35bd79 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import scala.collection.mutable
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.linalg._
import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators}
@@ -27,14 +27,12 @@ import org.apache.spark.ml.util._
import org.apache.spark.sql.types.DataType
/**
- * :: Experimental ::
* Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
* which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
* expansion of a product of sums expresses it as a sum of products by using the fact that
* multiplication distributes over addition". Take a 2-variable feature vector as an example:
* `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
*/
-@Experimental
@Since("1.4.0")
class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 96b8e7d9f7..9a636bd8a5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml._
import org.apache.spark.ml.attribute.NominalAttribute
@@ -64,7 +64,6 @@ private[feature] trait QuantileDiscretizerBase extends Params
}
/**
- * :: Experimental ::
* `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned
* categorical features. The number of bins can be set using the `numBuckets` parameter.
* The bin ranges are chosen using an approximate algorithm (see the documentation for
@@ -73,7 +72,6 @@ private[feature] trait QuantileDiscretizerBase extends Params
* `relativeError` parameter. The lower and upper bin bounds will be `-Infinity` and `+Infinity`,
* covering all real values.
*/
-@Experimental
@Since("1.6.0")
final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String)
extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index b8715746fe..289037640f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.util._
@@ -25,7 +25,6 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import org.apache.spark.sql.types.StructType
/**
- * :: Experimental ::
* Implements the transformations which are defined by SQL statement.
* Currently we only support SQL syntax like 'SELECT ... FROM __THIS__ ...'
* where '__THIS__' represents the underlying table of the input dataset.
@@ -37,7 +36,6 @@ import org.apache.spark.sql.types.StructType
* - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5
* - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b
*/
-@Experimental
@Since("1.6.0")
class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index b4be95494f..2494cf51a2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml._
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
import org.apache.spark.ml.param._
@@ -76,7 +76,6 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
}
/**
- * :: Experimental ::
* Standardizes features by removing the mean and scaling to unit variance using column summary
* statistics on the samples in the training set.
*
@@ -85,7 +84,6 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
* corrected sample standard deviation]],
* which is computed as the square root of the unbiased sample variance.
*/
-@Experimental
@Since("1.2.0")
class StandardScaler @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
@@ -138,13 +136,11 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] {
}
/**
- * :: Experimental ::
* Model fitted by [[StandardScaler]].
*
* @param std Standard deviation of the StandardScalerModel
* @param mean Mean of the StandardScalerModel
*/
-@Experimental
@Since("1.2.0")
class StandardScalerModel private[ml] (
@Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 1a6f42f773..666070037c 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam}
import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
@@ -27,12 +27,10 @@ import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
/**
- * :: Experimental ::
* A feature transformer that filters out stop words from input.
* Note: null values from input array are preserved unless adding null to stopWords explicitly.
* @see [[http://en.wikipedia.org/wiki/Stop_words]]
*/
-@Experimental
@Since("1.5.0")
class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 028e540fe5..fe79e2ec80 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model, Transformer}
import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
import org.apache.spark.ml.param._
@@ -55,7 +55,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
}
/**
- * :: Experimental ::
* A label indexer that maps a string column of labels to an ML column of label indices.
* If the input column is numeric, we cast it to string and index the string values.
* The indices are in [0, numLabels), ordered by label frequencies.
@@ -63,7 +62,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
*
* @see [[IndexToString]] for the inverse transformation
*/
-@Experimental
@Since("1.4.0")
class StringIndexer @Since("1.4.0") (
@Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel]
@@ -112,7 +110,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
}
/**
- * :: Experimental ::
* Model fitted by [[StringIndexer]].
*
* NOTE: During transformation, if the input column does not exist,
@@ -121,7 +118,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
*
* @param labels Ordered list of labels, corresponding to indices to be assigned.
*/
-@Experimental
@Since("1.4.0")
class StringIndexerModel (
@Since("1.4.0") override val uid: String,
@@ -250,7 +246,6 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
}
/**
- * :: Experimental ::
* A [[Transformer]] that maps a column of indices back to a new column of corresponding
* string values.
* The index-string mapping is either from the ML attributes of the input column,
@@ -258,7 +253,6 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
*
* @see [[StringIndexer]] for converting strings into indices
*/
-@Experimental
@Since("1.5.0")
class IndexToString private[ml] (@Since("1.5.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 010c948749..45d8fa94a8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -17,19 +17,17 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param._
import org.apache.spark.ml.util._
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
/**
- * :: Experimental ::
* A tokenizer that converts the input string to lowercase and then splits it by white spaces.
*
* @see [[RegexTokenizer]]
*/
-@Experimental
@Since("1.2.0")
class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable {
@@ -59,13 +57,11 @@ object Tokenizer extends DefaultParamsReadable[Tokenizer] {
}
/**
- * :: Experimental ::
* A regex based tokenizer that extracts tokens either by using the provided regex pattern to split
* the text (default) or repeatedly matching the regex (if `gaps` is false).
* Optional parameters also allow filtering tokens using a minimal length.
* It returns an array of strings that can be empty.
*/
-@Experimental
@Since("1.4.0")
class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[String, Seq[String], RegexTokenizer] with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 4939dabd98..142a2ae44c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import scala.collection.mutable.ArrayBuilder
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute}
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
@@ -32,10 +32,8 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
/**
- * :: Experimental ::
* A feature transformer that merges multiple columns into a vector column.
*/
-@Experimental
@Since("1.4.0")
class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 5656a9f979..d1a5c2e825 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, VectorUDT}
@@ -59,7 +59,6 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
}
/**
- * :: Experimental ::
* Class for indexing categorical feature columns in a dataset of [[Vector]].
*
* This has 2 usage modes:
@@ -93,7 +92,6 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
* - Add warning if a categorical feature has only 1 category.
* - Add option for allowing unknown categories.
*/
-@Experimental
@Since("1.4.0")
class VectorIndexer @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
@@ -247,7 +245,6 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
}
/**
- * :: Experimental ::
* Model fitted by [[VectorIndexer]]. Transform categorical features to use 0-based indices
* instead of their original values.
* - Categorical features are mapped to indices.
@@ -263,7 +260,6 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
* Values are maps from original features values to 0-based category indices.
* If a feature is not in this map, it is treated as continuous.
*/
-@Experimental
@Since("1.4.0")
class VectorIndexerModel private[ml] (
@Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
index 6769e490c5..966ccb85d0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.{Attribute, AttributeGroup}
import org.apache.spark.ml.linalg._
@@ -29,7 +29,6 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.StructType
/**
- * :: Experimental ::
* This class takes a feature vector and outputs a new feature vector with a subarray of the
* original features.
*
@@ -40,7 +39,6 @@ import org.apache.spark.sql.types.StructType
* The output vector will order features with the selected indices first (in the order given),
* followed by the selected names (in the order given).
*/
-@Experimental
@Since("1.5.0")
final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 0cac3fa2d7..c2b434c3d5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -19,8 +19,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT}
import org.apache.spark.ml.param._
@@ -115,11 +114,9 @@ private[feature] trait Word2VecBase extends Params
}
/**
- * :: Experimental ::
* Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
* natural language processing or machine learning process.
*/
-@Experimental
@Since("1.4.0")
final class Word2Vec @Since("1.4.0") (
@Since("1.4.0") override val uid: String)
@@ -202,10 +199,8 @@ object Word2Vec extends DefaultParamsReadable[Word2Vec] {
}
/**
- * :: Experimental ::
* Model fitted by [[Word2Vec]].
*/
-@Experimental
@Since("1.4.0")
class Word2VecModel private[ml] (
@Since("1.4.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index ecec61a72f..e7780cf1c3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -28,9 +28,9 @@ import scala.collection.JavaConverters._
import org.json4s._
import org.json4s.jackson.JsonMethods._
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.ml.linalg.JsonVectorConverter
+import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.util.Identifiable
/**
@@ -510,11 +510,9 @@ class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[In
}
/**
- * :: Experimental ::
* A param and its value.
*/
@Since("1.2.0")
-@Experimental
case class ParamPair[T] @Since("1.2.0") (
@Since("1.2.0") param: Param[T],
@Since("1.2.0") value: T) {
@@ -797,11 +795,9 @@ trait Params extends Identifiable with Serializable {
abstract class JavaParams extends Params
/**
- * :: Experimental ::
* A param to value map.
*/
@Since("1.2.0")
-@Experimental
final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
extends Serializable {
@@ -952,7 +948,6 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
}
@Since("1.2.0")
-@Experimental
object ParamMap {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 5dc2433e55..a2c4c26911 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -26,12 +26,12 @@ import scala.util.{Sorting, Try}
import scala.util.hashing.byteswap64
import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.Path
import org.json4s.DefaultFormats
import org.json4s.JsonDSL._
import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext}
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
@@ -222,14 +222,12 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
}
/**
- * :: Experimental ::
* Model fitted by ALS.
*
* @param rank rank of the matrix factorization model
* @param userFactors a DataFrame that stores user factors in two columns: `id` and `features`
* @param itemFactors a DataFrame that stores item factors in two columns: `id` and `features`
*/
-@Experimental
@Since("1.3.0")
class ALSModel private[ml] (
@Since("1.4.0") override val uid: String,
@@ -333,7 +331,6 @@ object ALSModel extends MLReadable[ALSModel] {
}
/**
- * :: Experimental ::
* Alternating Least Squares (ALS) matrix factorization.
*
* ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices,
@@ -362,7 +359,6 @@ object ALSModel extends MLReadable[ALSModel] {
* indicated user
* preferences rather than explicit ratings given to items.
*/
-@Experimental
@Since("1.3.0")
class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel] with ALSParams
with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 7ff6d0afd5..ebc6c12ddc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.Vector
@@ -38,13 +38,11 @@ import org.apache.spark.sql.functions._
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
* for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
-@Experimental
class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeRegressorParams with DefaultParamsWritable {
@@ -125,7 +123,6 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
}
@Since("1.4.0")
-@Experimental
object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor] {
/** Accessor for supported impurities: variance */
final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
@@ -135,13 +132,11 @@ object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor
}
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression.
* It supports both continuous and categorical features.
* @param rootNode Root of the decision tree
*/
@Since("1.4.0")
-@Experimental
class DecisionTreeRegressionModel private[ml] (
override val uid: String,
override val rootNode: Node,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 6223555504..ce355938ec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -38,7 +38,6 @@ import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* learning algorithm for regression.
* It supports both continuous and categorical features.
@@ -56,7 +55,6 @@ import org.apache.spark.sql.functions._
* [https://issues.apache.org/jira/browse/SPARK-4240]
*/
@Since("1.4.0")
-@Experimental
class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTRegressorParams with DefaultParamsWritable with Logging {
@@ -135,7 +133,6 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
}
@Since("1.4.0")
-@Experimental
object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
/** Accessor for supported loss settings: squared (L2), absolute (L1) */
@@ -147,8 +144,6 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
}
/**
- * :: Experimental ::
- *
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* model for regression.
* It supports both continuous and categorical features.
@@ -156,7 +151,6 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
@Since("1.4.0")
-@Experimental
class GBTRegressionModel private[ml](
override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 9b9429a328..35396446ed 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
@@ -120,7 +120,6 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
}
/**
- * :: Experimental ::
* Isotonic regression.
*
* Currently implemented using parallelized pool adjacent violators algorithm.
@@ -129,7 +128,6 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
@Since("1.5.0")
-@Experimental
class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[IsotonicRegressionModel]
with IsotonicRegressionBase with DefaultParamsWritable {
@@ -192,7 +190,6 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] {
}
/**
- * :: Experimental ::
* Model fitted by IsotonicRegression.
* Predicts using a piecewise linear function.
*
@@ -202,7 +199,6 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] {
* model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
@Since("1.5.0")
-@Experimental
class IsotonicRegressionModel private[ml] (
override val uid: String,
private val oldModel: MLlibIsotonicRegressionModel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 6b82ae14e1..c57e9eb0bf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -54,7 +54,6 @@ private[regression] trait LinearRegressionParams extends PredictorParams
with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver
/**
- * :: Experimental ::
* Linear regression.
*
* The learning objective is to minimize the squared error, with regularization.
@@ -68,7 +67,6 @@ private[regression] trait LinearRegressionParams extends PredictorParams
* - L2 + L1 (elastic net)
*/
@Since("1.3.0")
-@Experimental
class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
extends Regressor[Vector, LinearRegression, LinearRegressionModel]
with LinearRegressionParams with DefaultParamsWritable with Logging {
@@ -387,11 +385,9 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
}
/**
- * :: Experimental ::
* Model produced by [[LinearRegression]].
*/
@Since("1.3.0")
-@Experimental
class LinearRegressionModel private[ml] (
@Since("1.4.0") override val uid: String,
@Since("2.0.0") val coefficients: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 4f4d3d2784..0ad00aa6f9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.Vector
@@ -37,12 +37,10 @@ import org.apache.spark.sql.functions._
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
-@Experimental
class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
with RandomForestRegressorParams with DefaultParamsWritable {
@@ -118,7 +116,6 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
}
@Since("1.4.0")
-@Experimental
object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor]{
/** Accessor for supported impurity settings: variance */
@Since("1.4.0")
@@ -135,7 +132,6 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor
}
/**
- * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for regression.
* It supports both continuous and categorical features.
*
@@ -143,7 +139,6 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor
* @param numFeatures Number of features used by this model
*/
@Since("1.4.0")
-@Experimental
class RandomForestRegressionModel private[ml] (
override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
index d5e5c45460..8144bcb7d4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
@@ -17,17 +17,14 @@
package org.apache.spark.ml.tree
-import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
import org.apache.spark.mllib.tree.model.{ImpurityStats,
InformationGainStats => OldInformationGainStats, Node => OldNode, Predict => OldPredict}
/**
- * :: DeveloperApi ::
* Decision tree node interface.
*/
-@DeveloperApi
sealed abstract class Node extends Serializable {
// TODO: Add aggregate stats (once available). This will happen after we move the DecisionTree
@@ -109,12 +106,10 @@ private[ml] object Node {
}
/**
- * :: DeveloperApi ::
* Decision tree leaf node.
* @param prediction Prediction this node makes
* @param impurity Impurity measure at this node (for training data)
*/
-@DeveloperApi
class LeafNode private[ml] (
override val prediction: Double,
override val impurity: Double,
@@ -147,7 +142,6 @@ class LeafNode private[ml] (
}
/**
- * :: DeveloperApi ::
* Internal Decision Tree node.
* @param prediction Prediction this node would make if it were a leaf node
* @param impurity Impurity measure at this node (for training data)
@@ -157,7 +151,6 @@ class LeafNode private[ml] (
* @param rightChild Right-hand child node
* @param split Information about the test used to split to the left or right child.
*/
-@DeveloperApi
class InternalNode private[ml] (
override val prediction: Double,
override val impurity: Double,
@@ -167,6 +160,9 @@ class InternalNode private[ml] (
val split: Split,
override private[ml] val impurityStats: ImpurityCalculator) extends Node {
+ // Note to developers: The constructor argument impurityStats should be reconsidered before we
+ // make the constructor public. We may be able to improve the representation.
+
override def toString: String = {
s"InternalNode(prediction = $prediction, impurity = $impurity, split = $split)"
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
index 9704e15cd8..47fe3524f2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
@@ -19,18 +19,16 @@ package org.apache.spark.ml.tree
import java.util.Objects
-import org.apache.spark.annotation.{DeveloperApi, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.mllib.tree.configuration.{FeatureType => OldFeatureType}
import org.apache.spark.mllib.tree.model.{Split => OldSplit}
/**
- * :: DeveloperApi ::
* Interface for a "Split," which specifies a test made at a decision tree node
* to choose the left or right path.
*/
-@DeveloperApi
sealed trait Split extends Serializable {
/** Index of feature which this split tests */
@@ -67,14 +65,12 @@ private[tree] object Split {
}
/**
- * :: DeveloperApi ::
* Split which tests a categorical feature.
* @param featureIndex Index of the feature to test
* @param _leftCategories If the feature value is in this set of categories, then the split goes
* left. Otherwise, it goes right.
* @param numCategories Number of categories for this feature.
*/
-@DeveloperApi
class CategoricalSplit private[ml] (
override val featureIndex: Int,
_leftCategories: Array[Double],
@@ -153,13 +149,11 @@ class CategoricalSplit private[ml] (
}
/**
- * :: DeveloperApi ::
* Split which tests a continuous feature.
* @param featureIndex Index of the feature to test
* @param threshold If the feature value is <= this threshold, then the split goes left.
* Otherwise, it goes right.
*/
-@DeveloperApi
class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double)
extends Split {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 7d42da4a2f..520557849b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -25,7 +25,7 @@ import com.github.fommil.netlib.F2jBLAS
import org.apache.hadoop.fs.Path
import org.json4s.DefaultFormats
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml._
import org.apache.spark.ml.evaluation.Evaluator
@@ -55,11 +55,9 @@ private[ml] trait CrossValidatorParams extends ValidatorParams {
}
/**
- * :: Experimental ::
* K-fold cross validation.
*/
@Since("1.2.0")
-@Experimental
class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
extends Estimator[CrossValidatorModel]
with CrossValidatorParams with MLWritable with Logging {
@@ -190,7 +188,6 @@ object CrossValidator extends MLReadable[CrossValidator] {
}
/**
- * :: Experimental ::
* Model from k-fold cross validation.
*
* @param bestModel The best model selected from k-fold cross validation.
@@ -198,7 +195,6 @@ object CrossValidator extends MLReadable[CrossValidator] {
* [[CrossValidator.estimatorParamMaps]], in the corresponding order.
*/
@Since("1.2.0")
-@Experimental
class CrossValidatorModel private[ml] (
@Since("1.4.0") override val uid: String,
@Since("1.2.0") val bestModel: Model[_],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
index 7d12f447f7..d369e7a61c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -20,15 +20,13 @@ package org.apache.spark.ml.tuning
import scala.annotation.varargs
import scala.collection.mutable
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._
/**
- * :: Experimental ::
* Builder for a param grid used in grid search-based model selection.
*/
@Since("1.2.0")
-@Experimental
class ParamGridBuilder @Since("1.2.0") {
private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index f6f2bad401..0fdba1cb88 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -25,7 +25,7 @@ import scala.language.existentials
import org.apache.hadoop.fs.Path
import org.json4s.DefaultFormats
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.evaluation.Evaluator
@@ -54,14 +54,12 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams {
}
/**
- * :: Experimental ::
* Validation for hyper-parameter tuning.
* Randomly splits the input dataset into train and validation sets,
* and uses evaluation metric on the validation set to select the best model.
* Similar to [[CrossValidator]], but only splits the set once.
*/
@Since("1.5.0")
-@Experimental
class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[TrainValidationSplitModel]
with TrainValidationSplitParams with MLWritable with Logging {
@@ -188,7 +186,6 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
}
/**
- * :: Experimental ::
* Model from train validation split.
*
* @param uid Id.
@@ -196,7 +193,6 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
* @param validationMetrics Evaluated validation metrics.
*/
@Since("1.5.0")
-@Experimental
class TrainValidationSplitModel private[ml] (
@Since("1.5.0") override val uid: String,
@Since("1.5.0") val bestModel: Model[_],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 91edcf2a79..f1664ce4ab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -22,7 +22,7 @@ import java.util.Random
import scala.annotation.tailrec
import scala.collection.mutable
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.internal.Logging
import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
@@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
/**
- * :: Experimental ::
- *
* A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
* by Steinbach, Karypis, and Kumar, with modification to fit Spark.
* The algorithm starts from a single cluster that contains all points.
@@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel
* KDD Workshop on Text Mining, 2000.]]
*/
@Since("1.6.0")
-@Experimental
class BisectingKMeans private (
private var k: Int,
private var maxIterations: Int,
@@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable {
}
/**
- * :: Experimental ::
- *
* Represents a node in a clustering tree.
*
* @param index node index, negative for internal nodes and non-negative for leaf nodes
@@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable {
* @param children children nodes
*/
@Since("1.6.0")
-@Experimental
private[clustering] class ClusteringTreeNode private[clustering] (
val index: Int,
val size: Long,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 11fd940b8b..8438015cce 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._
import org.json4s.JsonDSL._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.internal.Logging
import org.apache.spark.mllib.linalg.Vector
@@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
/**
- * :: Experimental ::
- *
* Clustering model produced by [[BisectingKMeans]].
* The prediction is done level-by-level from the root node to a leaf node, and at each node among
* its children the closest to the input point is selected.
@@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession}
* @param root the root node of the clustering tree
*/
@Since("1.6.0")
-@Experimental
class BisectingKMeansModel private[clustering] (
private[clustering] val root: ClusteringTreeNode
) extends Serializable with Saveable with Logging {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index d295826300..9ebba1de0d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId}
import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors}
@@ -426,13 +426,10 @@ class LocalLDAModel private[spark] (
}
/**
- * :: Experimental ::
- *
* Local (non-distributed) model fitted by [[LDA]].
*
* This model stores the inferred topics only; it does not store info about the training dataset.
*/
-@Experimental
@Since("1.5.0")
object LocalLDAModel extends Loader[LocalLDAModel] {
@@ -822,15 +819,12 @@ class DistributedLDAModel private[clustering] (
}
/**
- * :: Experimental ::
- *
* Distributed model fitted by [[LDA]].
* This type of model is currently only produced by Expectation-Maximization (EM).
*
* This model stores the inferred topics, the full training dataset, and the topic distribution
* for each training document.
*/
-@Experimental
@Since("1.5.0")
object DistributedLDAModel extends Loader[DistributedLDAModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 9a63cc29da..3c26d26708 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.fpm
import scala.collection.JavaConverters._
import scala.reflect.ClassTag
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.internal.Logging
@@ -28,14 +28,11 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
import org.apache.spark.rdd.RDD
/**
- * :: Experimental ::
- *
* Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates
* association rules which have a single item as the consequent.
*
*/
@Since("1.5.0")
-@Experimental
class AssociationRules private[fpm] (
private var minConfidence: Double) extends Logging with Serializable {
@@ -95,8 +92,6 @@ class AssociationRules private[fpm] (
object AssociationRules {
/**
- * :: Experimental ::
- *
* An association rule between sets of items.
* @param antecedent hypotheses of the rule. Java users should call [[Rule#javaAntecedent]]
* instead.
@@ -106,7 +101,6 @@ object AssociationRules {
*
*/
@Since("1.5.0")
- @Experimental
class Rule[Item] private[fpm] (
@Since("1.5.0") val antecedent: Array[Item],
@Since("1.5.0") val consequent: Array[Item],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index c13c794775..7382000791 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -30,7 +30,7 @@ import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, render}
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.internal.Logging
@@ -42,8 +42,6 @@ import org.apache.spark.sql.types._
import org.apache.spark.storage.StorageLevel
/**
- * :: Experimental ::
- *
* A parallel PrefixSpan algorithm to mine frequent sequential patterns.
* The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
* Efficiently by Prefix-Projected Pattern Growth ([[http://doi.org/10.1109/ICDE.2001.914830]]).
@@ -60,7 +58,6 @@ import org.apache.spark.storage.StorageLevel
* @see [[https://en.wikipedia.org/wiki/Sequential_Pattern_Mining Sequential Pattern Mining
* (Wikipedia)]]
*/
-@Experimental
@Since("1.5.0")
class PrefixSpan private (
private var minSupport: Double,
@@ -230,7 +227,6 @@ class PrefixSpan private (
}
-@Experimental
@Since("1.5.0")
object PrefixSpan extends Logging {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
index 4591cb88ef..8024b1c003 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.linalg
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
/**
* Represents singular value decomposition (SVD) factors.
@@ -26,10 +26,8 @@ import org.apache.spark.annotation.{Experimental, Since}
case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
/**
- * :: Experimental ::
* Represents QR factors.
*/
@Since("1.5.0")
-@Experimental
case class QRDecomposition[QType, RType](Q: QType, R: RType)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 480a64548c..f372355005 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer
import breeze.linalg.{norm, DenseVector => BDV}
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.internal.Logging
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.rdd.RDD
@@ -53,11 +53,9 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va
}
/**
- * :: Experimental ::
* Set fraction of data to be used for each SGD iteration.
* Default 1.0 (corresponding to deterministic/classical gradient descent)
*/
- @Experimental
def setMiniBatchFraction(fraction: Double): this.type = {
require(fraction > 0 && fraction <= 1.0,
s"Fraction for mini-batch SGD must be in range (0, 1] but got ${fraction}")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
index 274ac7c995..5d61796f1d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult
import org.jpmml.model.JAXBUtil
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
/**
@@ -45,20 +45,16 @@ trait PMMLExportable {
}
/**
- * :: Experimental ::
* Export the model to a local file in PMML format
*/
- @Experimental
@Since("1.4.0")
def toPMML(localPath: String): Unit = {
toPMML(new StreamResult(new File(localPath)))
}
/**
- * :: Experimental ::
* Export the model to a directory on a distributed file system in PMML format
*/
- @Experimental
@Since("1.4.0")
def toPMML(sc: SparkContext, path: String): Unit = {
val pmml = toPMML()
@@ -66,20 +62,16 @@ trait PMMLExportable {
}
/**
- * :: Experimental ::
* Export the model to the OutputStream in PMML format
*/
- @Experimental
@Since("1.4.0")
def toPMML(outputStream: OutputStream): Unit = {
toPMML(new StreamResult(outputStream))
}
/**
- * :: Experimental ::
* Export the model to a String in PMML format
*/
- @Experimental
@Since("1.4.0")
def toPMML(): String = {
val writer = new StringWriter
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
index 4c382d7c2b..97c032de7a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat.test
import scala.beans.BeanInfo
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.streaming.api.java.JavaDStream
import org.apache.spark.streaming.dstream.DStream
@@ -42,7 +42,6 @@ case class BinarySample @Since("1.6.0") (
}
/**
- * :: Experimental ::
* Performs online 2-sample significance testing for a stream of (Boolean, Double) pairs. The
* Boolean identifies which sample each observation comes from, and the Double is the numeric value
* of the observation.
@@ -67,7 +66,6 @@ case class BinarySample @Since("1.6.0") (
* .registerStream(DStream)
* }}}
*/
-@Experimental
@Since("1.6.0")
class StreamingTest @Since("1.6.0") () extends Logging with Serializable {
private var peacePeriod: Int = 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
index 8a29fd39a9..5cfc05a3dd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.stat.test
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
/**
* Trait for hypothesis test results.
@@ -94,10 +94,8 @@ class ChiSqTestResult private[stat] (override val pValue: Double,
}
/**
- * :: Experimental ::
* Object containing the test results for the Kolmogorov-Smirnov test.
*/
-@Experimental
@Since("1.5.0")
class KolmogorovSmirnovTestResult private[stat] (
@Since("1.5.0") override val pValue: Double,
@@ -113,10 +111,8 @@ class KolmogorovSmirnovTestResult private[stat] (
}
/**
- * :: Experimental ::
* Object containing the test results for streaming testing.
*/
-@Experimental
@Since("1.6.0")
private[stat] class StreamingTestResult @Since("1.6.0") (
@Since("1.6.0") override val pValue: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 853c7319ec..2436ce4086 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -17,14 +17,12 @@
package org.apache.spark.mllib.tree.configuration
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
/**
- * :: Experimental ::
* Enum to select the algorithm for the decision tree
*/
@Since("1.0.0")
-@Experimental
object Algo extends Enumeration {
@Since("1.0.0")
type Algo = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 3a731f45d6..d4448da9ee 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,14 +17,12 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
/**
- * :: Experimental ::
* Class for calculating entropy during multiclass classification.
*/
@Since("1.0.0")
-@Experimental
object Entropy extends Impurity {
private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 7730c0a8c1..22e70278a6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,16 +17,14 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
/**
- * :: Experimental ::
* Class for calculating the
* [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
* during multiclass classification.
*/
@Since("1.0.0")
-@Experimental
object Gini extends Impurity {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 65f0163ec6..a5bdc2c6d2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,17 +17,15 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
/**
- * :: Experimental ::
* Trait for calculating information gain.
* This trait is used for
* (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]]
* (b) calculating impurity values from sufficient statistics.
*/
@Since("1.0.0")
-@Experimental
trait Impurity extends Serializable {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 2423516123..c9bf0db4de 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,14 +17,12 @@
package org.apache.spark.mllib.tree.impurity
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
/**
- * :: Experimental ::
* Class for calculating variance during regression
*/
@Since("1.0.0")
-@Experimental
object Variance extends Impurity {
/**