From 01f09b161217193b797c8c85969d17054c958615 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Wed, 13 Jul 2016 12:33:39 -0700 Subject: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML ## What changes were proposed in this pull request? General decisions to follow, except where noted: * spark.mllib, pyspark.mllib: Remove all Experimental annotations. Leave DeveloperApi annotations alone. * spark.ml, pyspark.ml ** Annotate Estimator-Model pairs of classes and companion objects the same way. ** For all algorithms marked Experimental with Since tag <= 1.6, remove Experimental annotation. ** For all algorithms marked Experimental with Since tag = 2.0, leave Experimental annotation. * DeveloperApi annotations are left alone, except where noted. * No changes to which types are sealed. Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, mainly because the items are new: * Model Summary classes * MLWriter, MLReader, MLWritable, MLReadable * Evaluator and subclasses: There is discussion of changes around evaluating multiple metrics at once for efficiency. * RFormula: Its behavior may need to change slightly to match R in edge cases. * AFTSurvivalRegression * MultilayerPerceptronClassifier DeveloperApi changes: * ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi ## How was this patch tested? N/A Note to reviewers: * spark.ml.clustering.LDA underwent significant changes (additional methods), so let me know if you want me to leave it Experimental. * Be careful to check for cases where a class should no longer be Experimental but has an Experimental method, val, or other feature. I did not find such cases, but please verify. Author: Joseph K. Bradley Closes #14147 from jkbradley/experimental-audit. --- .../main/scala/org/apache/spark/ml/Pipeline.scala | 6 +- .../ml/classification/DecisionTreeClassifier.scala | 7 +-- .../spark/ml/classification/GBTClassifier.scala | 7 +-- .../ml/classification/LogisticRegression.scala | 4 -- .../spark/ml/classification/NaiveBayes.scala | 6 +- .../apache/spark/ml/classification/OneVsRest.scala | 7 +-- .../ml/classification/RandomForestClassifier.scala | 7 +-- .../org/apache/spark/ml/feature/Binarizer.scala | 4 +- .../org/apache/spark/ml/feature/Bucketizer.scala | 4 +- .../apache/spark/ml/feature/ChiSqSelector.scala | 6 +- .../apache/spark/ml/feature/CountVectorizer.scala | 6 +- .../scala/org/apache/spark/ml/feature/DCT.scala | 4 +- .../spark/ml/feature/ElementwiseProduct.scala | 4 +- .../org/apache/spark/ml/feature/HashingTF.scala | 4 +- .../scala/org/apache/spark/ml/feature/IDF.scala | 6 +- .../org/apache/spark/ml/feature/Interaction.scala | 4 +- .../org/apache/spark/ml/feature/LabeledPoint.scala | 2 + .../org/apache/spark/ml/feature/MinMaxScaler.scala | 6 +- .../scala/org/apache/spark/ml/feature/NGram.scala | 4 +- .../org/apache/spark/ml/feature/Normalizer.scala | 4 +- .../apache/spark/ml/feature/OneHotEncoder.scala | 4 +- .../scala/org/apache/spark/ml/feature/PCA.scala | 7 +-- .../spark/ml/feature/PolynomialExpansion.scala | 4 +- .../spark/ml/feature/QuantileDiscretizer.scala | 4 +- .../apache/spark/ml/feature/SQLTransformer.scala | 4 +- .../apache/spark/ml/feature/StandardScaler.scala | 6 +- .../apache/spark/ml/feature/StopWordsRemover.scala | 4 +- .../apache/spark/ml/feature/StringIndexer.scala | 8 +-- .../org/apache/spark/ml/feature/Tokenizer.scala | 6 +- .../apache/spark/ml/feature/VectorAssembler.scala | 4 +- .../apache/spark/ml/feature/VectorIndexer.scala | 6 +- .../org/apache/spark/ml/feature/VectorSlicer.scala | 4 +- .../org/apache/spark/ml/feature/Word2Vec.scala | 7 +-- .../scala/org/apache/spark/ml/param/params.scala | 9 +-- .../org/apache/spark/ml/recommendation/ALS.scala | 8 +-- .../ml/regression/DecisionTreeRegressor.scala | 7 +-- .../apache/spark/ml/regression/GBTRegressor.scala | 6 -- .../spark/ml/regression/IsotonicRegression.scala | 6 +- .../spark/ml/regression/LinearRegression.scala | 4 -- .../ml/regression/RandomForestRegressor.scala | 7 +-- .../main/scala/org/apache/spark/ml/tree/Node.scala | 10 +--- .../scala/org/apache/spark/ml/tree/Split.scala | 8 +-- .../apache/spark/ml/tuning/CrossValidator.scala | 6 +- .../apache/spark/ml/tuning/ParamGridBuilder.scala | 4 +- .../spark/ml/tuning/TrainValidationSplit.scala | 6 +- .../spark/mllib/clustering/BisectingKMeans.scala | 8 +-- .../mllib/clustering/BisectingKMeansModel.scala | 5 +- .../apache/spark/mllib/clustering/LDAModel.scala | 8 +-- .../apache/spark/mllib/fpm/AssociationRules.scala | 8 +-- .../org/apache/spark/mllib/fpm/PrefixSpan.scala | 6 +- .../mllib/linalg/SingularValueDecomposition.scala | 4 +- .../spark/mllib/optimization/GradientDescent.scala | 4 +- .../apache/spark/mllib/pmml/PMMLExportable.scala | 10 +--- .../spark/mllib/stat/test/StreamingTest.scala | 4 +- .../apache/spark/mllib/stat/test/TestResult.scala | 6 +- .../spark/mllib/tree/configuration/Algo.scala | 4 +- .../apache/spark/mllib/tree/impurity/Entropy.scala | 4 +- .../apache/spark/mllib/tree/impurity/Gini.scala | 4 +- .../spark/mllib/tree/impurity/Impurity.scala | 4 +- .../spark/mllib/tree/impurity/Variance.scala | 4 +- python/pyspark/ml/classification.py | 24 ++------ python/pyspark/ml/feature.py | 64 ---------------------- python/pyspark/ml/regression.py | 34 +++--------- python/pyspark/ml/tuning.py | 6 -- python/pyspark/mllib/classification.py | 6 -- python/pyspark/mllib/clustering.py | 16 ------ python/pyspark/mllib/feature.py | 16 ------ python/pyspark/mllib/fpm.py | 8 --- python/pyspark/mllib/linalg/__init__.py | 2 - python/pyspark/mllib/linalg/distributed.py | 14 ----- python/pyspark/mllib/stat/KernelDensity.py | 2 - python/pyspark/mllib/stat/_statistics.py | 4 -- python/pyspark/mllib/tree.py | 12 ---- 73 files changed, 74 insertions(+), 468 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index a1d08b3a6e..d18fb69799 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -27,7 +27,7 @@ import org.json4s._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.param.{Param, ParamMap, Params} import org.apache.spark.ml.util._ @@ -78,7 +78,6 @@ abstract class PipelineStage extends Params with Logging { } /** - * :: Experimental :: * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will @@ -90,7 +89,6 @@ abstract class PipelineStage extends Params with Logging { * an identity transformer. */ @Since("1.2.0") -@Experimental class Pipeline @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends Estimator[PipelineModel] with MLWritable { @@ -282,11 +280,9 @@ object Pipeline extends MLReadable[Pipeline] { } /** - * :: Experimental :: * Represents a fitted pipeline. */ @Since("1.2.0") -@Experimental class PipelineModel private[ml] ( @Since("1.4.0") override val uid: String, @Since("1.4.0") val stages: Array[Transformer]) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index c65d3d5b54..082848c9de 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.ml.param.ParamMap @@ -36,14 +36,12 @@ import org.apache.spark.sql.Dataset /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm * for classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ @Since("1.4.0") -@Experimental class DecisionTreeClassifier @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends ProbabilisticClassifier[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel] @@ -127,7 +125,6 @@ class DecisionTreeClassifier @Since("1.4.0") ( } @Since("1.4.0") -@Experimental object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifier] { /** Accessor for supported impurities: entropy, gini */ @Since("1.4.0") @@ -138,13 +135,11 @@ object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifi } /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ @Since("1.4.0") -@Experimental class DecisionTreeClassificationModel private[ml] ( @Since("1.4.0")override val uid: String, @Since("1.4.0")override val rootNode: Node, diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 4e534baddc..5946a12933 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -21,7 +21,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.feature.LabeledPoint @@ -40,7 +40,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.DoubleType /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * learning algorithm for classification. * It supports binary labels, as well as both continuous and categorical features. @@ -57,7 +56,6 @@ import org.apache.spark.sql.types.DoubleType * [https://issues.apache.org/jira/browse/SPARK-4240] */ @Since("1.4.0") -@Experimental class GBTClassifier @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends Predictor[Vector, GBTClassifier, GBTClassificationModel] @@ -149,7 +147,6 @@ class GBTClassifier @Since("1.4.0") ( } @Since("1.4.0") -@Experimental object GBTClassifier extends DefaultParamsReadable[GBTClassifier] { /** Accessor for supported loss settings: logistic */ @@ -161,7 +158,6 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] { } /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * model for classification. * It supports binary labels, as well as both continuous and categorical features. @@ -171,7 +167,6 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] { * @param _treeWeights Weights for the decision trees in the ensemble. */ @Since("1.6.0") -@Experimental class GBTClassificationModel private[ml]( @Since("1.6.0") override val uid: String, private val _trees: Array[DecisionTreeRegressionModel], diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 9c9f5ced4e..e157bdeb5b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -151,13 +151,11 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas } /** - * :: Experimental :: * Logistic regression. * Currently, this class only supports binary classification. It will support multiclass * in the future. */ @Since("1.2.0") -@Experimental class LogisticRegression @Since("1.2.0") ( @Since("1.4.0") override val uid: String) extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel] @@ -475,11 +473,9 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] { } /** - * :: Experimental :: * Model produced by [[LogisticRegression]]. */ @Since("1.4.0") -@Experimental class LogisticRegressionModel private[spark] ( @Since("1.4.0") override val uid: String, @Since("2.0.0") val coefficients: Vector, diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala index c99ae30155..ab977c8802 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.classification import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} @@ -63,7 +63,6 @@ private[ml] trait NaiveBayesParams extends PredictorParams { } /** - * :: Experimental :: * Naive Bayes Classifiers. * It supports both Multinomial NB * ([[http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html]]) @@ -74,7 +73,6 @@ private[ml] trait NaiveBayesParams extends PredictorParams { * The input feature values must be nonnegative. */ @Since("1.5.0") -@Experimental class NaiveBayes @Since("1.5.0") ( @Since("1.5.0") override val uid: String) extends ProbabilisticClassifier[Vector, NaiveBayes, NaiveBayesModel] @@ -121,14 +119,12 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] { } /** - * :: Experimental :: * Model produced by [[NaiveBayes]] * @param pi log of class priors, whose dimension is C (number of classes) * @param theta log of class conditional probabilities, whose dimension is C (number of classes) * by D (number of features) */ @Since("1.5.0") -@Experimental class NaiveBayesModel private[ml] ( @Since("1.5.0") override val uid: String, @Since("2.0.0") val pi: Vector, diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 047a378b79..f4ab0a074c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -29,7 +29,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml._ import org.apache.spark.ml.attribute._ import org.apache.spark.ml.linalg.Vector @@ -117,7 +117,6 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait { } /** - * :: Experimental :: * Model produced by [[OneVsRest]]. * This stores the models resulting from training k binary classifiers: one for each class. * Each example is scored against all k models, and the model with the highest score @@ -130,7 +129,6 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait { * (taking label 0). */ @Since("1.4.0") -@Experimental final class OneVsRestModel private[ml] ( @Since("1.4.0") override val uid: String, private[ml] val labelMetadata: Metadata, @@ -260,8 +258,6 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] { } /** - * :: Experimental :: - * * Reduction of Multiclass Classification to Binary Classification. * Performs reduction using one against all strategy. * For a multiclass classification with k classes, train k models (one per class). @@ -269,7 +265,6 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] { * is picked to label the example. */ @Since("1.4.0") -@Experimental final class OneVsRest @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends Estimator[OneVsRestModel] with OneVsRestParams with MLWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index 9a26a5c5b1..4ab132e5f2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.classification import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.ml.param.ParamMap @@ -36,14 +36,12 @@ import org.apache.spark.sql.functions._ /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for * classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ @Since("1.4.0") -@Experimental class RandomForestClassifier @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends ProbabilisticClassifier[Vector, RandomForestClassifier, RandomForestClassificationModel] @@ -124,7 +122,6 @@ class RandomForestClassifier @Since("1.4.0") ( } @Since("1.4.0") -@Experimental object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifier] { /** Accessor for supported impurity settings: entropy, gini */ @Since("1.4.0") @@ -140,7 +137,6 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi } /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. @@ -149,7 +145,6 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi * Warning: These have null parents. */ @Since("1.4.0") -@Experimental class RandomForestClassificationModel private[ml] ( @Since("1.5.0") override val uid: String, private val _trees: Array[DecisionTreeClassificationModel], diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index fa9634fdfa..2b0862c60f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.BinaryAttribute import org.apache.spark.ml.linalg._ @@ -31,10 +31,8 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ /** - * :: Experimental :: * Binarize a column of continuous features given a threshold. */ -@Experimental @Since("1.4.0") final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index caffc39e2b..100d9e7f6c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import java.{util => ju} import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Model import org.apache.spark.ml.attribute.NominalAttribute import org.apache.spark.ml.param._ @@ -31,10 +31,8 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} /** - * :: Experimental :: * `Bucketizer` maps a column of continuous features to a column of feature buckets. */ -@Experimental @Since("1.4.0") final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index 712634dffb..bd053e886f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml._ import org.apache.spark.ml.attribute.{AttributeGroup, _} import org.apache.spark.ml.linalg.{Vector, VectorUDT} @@ -57,11 +57,9 @@ private[feature] trait ChiSqSelectorParams extends Params } /** - * :: Experimental :: * Chi-Squared feature selection, which selects categorical features to use for predicting a * categorical label. */ -@Experimental @Since("1.6.0") final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable { @@ -116,10 +114,8 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] { } /** - * :: Experimental :: * Model fitted by [[ChiSqSelector]]. */ -@Experimental @Since("1.6.0") final class ChiSqSelectorModel private[ml] ( @Since("1.6.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 96e6f1c512..6299f74a6b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Vectors, VectorUDT} @@ -116,10 +116,8 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit } /** - * :: Experimental :: * Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]]. */ -@Experimental @Since("1.5.0") class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable { @@ -201,11 +199,9 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] { } /** - * :: Experimental :: * Converts a text document to a sparse vector of token counts. * @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted. */ -@Experimental @Since("1.5.0") class CountVectorizerModel( @Since("1.5.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 9605145e12..6ff36b35ca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import edu.emory.mathcs.jtransforms.dct._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.ml.param.BooleanParam @@ -27,7 +27,6 @@ import org.apache.spark.ml.util._ import org.apache.spark.sql.types.DataType /** - * :: Experimental :: * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero * padding is performed on the input vector. * It returns a real vector of the same length representing the DCT. The return vector is scaled @@ -35,7 +34,6 @@ import org.apache.spark.sql.types.DataType * * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. */ -@Experimental @Since("1.5.0") class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index d07833e580..f860b3a787 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.Param @@ -27,12 +27,10 @@ import org.apache.spark.mllib.linalg.VectorImplicits._ import org.apache.spark.sql.types.DataType /** - * :: Experimental :: * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a * provided "weight" vector. In other words, it scales each column of the dataset by a scalar * multiplier. */ -@Experimental @Since("1.4.0") class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 6ca7336cd0..a8792a35ff 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.param._ @@ -29,7 +29,6 @@ import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{ArrayType, StructType} /** - * :: Experimental :: * Maps a sequence of terms to their term frequencies using the hashing trick. * Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32) * to calculate the hash code value for the term object. @@ -37,7 +36,6 @@ import org.apache.spark.sql.types.{ArrayType, StructType} * it is advisable to use a power of two as the numFeatures parameter; * otherwise the features will not be mapped evenly to the columns. */ -@Experimental @Since("1.2.0") class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 5d6287f0e3..6386dd8a10 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml._ import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param._ @@ -61,10 +61,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol } /** - * :: Experimental :: * Compute the Inverse Document Frequency (IDF) given a collection of documents. */ -@Experimental @Since("1.4.0") final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[IDFModel] with IDFBase with DefaultParamsWritable { @@ -111,10 +109,8 @@ object IDF extends DefaultParamsReadable[IDF] { } /** - * :: Experimental :: * Model fitted by [[IDF]]. */ -@Experimental @Since("1.4.0") class IDFModel private[ml] ( @Since("1.4.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index dca28b5c5d..7b11f86279 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.attribute._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ @@ -32,7 +32,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ /** - * :: Experimental :: * Implements the feature interaction transform. This transformer takes in Double and Vector type * columns and outputs a flattened vector of their feature interactions. To handle interaction, * we first one-hot encode any nominal features. Then, a vector of the feature cross-products is @@ -42,7 +41,6 @@ import org.apache.spark.sql.types._ * `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal * with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`. */ -@Experimental @Since("1.6.0") class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala index f7f1d42039..6cefa7086c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala @@ -23,6 +23,8 @@ import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.linalg.Vector /** + * :: Experimental :: + * * Class that represents the features and labels of a data point. * * @param label Label for this data point. diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index d5ad5abced..7b03f0c0f3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.ml.param.{DoubleParam, ParamMap, Params} @@ -74,7 +74,6 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H } /** - * :: Experimental :: * Rescale each feature individually to a common range [min, max] linearly using column summary * statistics, which is also known as min-max normalization or Rescaling. The rescaled value for * feature E is calculated as, @@ -85,7 +84,6 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H * Note that since zero values will probably be transformed to non-zero values, output of the * transformer will be DenseVector even for sparse input. */ -@Experimental @Since("1.5.0") class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[MinMaxScalerModel] with MinMaxScalerParams with DefaultParamsWritable { @@ -138,7 +136,6 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] { } /** - * :: Experimental :: * Model fitted by [[MinMaxScaler]]. * * @param originalMin min value for each original column during fitting @@ -146,7 +143,6 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] { * * TODO: The transformer does not yet set the metadata in the output column (SPARK-8529). */ -@Experimental @Since("1.5.0") class MinMaxScalerModel private[ml] ( @Since("1.5.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index 9c1f1ad443..4463aea009 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -17,14 +17,13 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} /** - * :: Experimental :: * A feature transformer that converts the input array of strings into an array of n-grams. Null * values in the input array are ignored. * It returns an array of n-grams where each n-gram is represented by a space-separated string of @@ -34,7 +33,6 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * When the input array length is less than n (number of elements per n-gram), no n-grams are * returned. */ -@Experimental @Since("1.5.0") class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Seq[String], Seq[String], NGram] with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index f9cbad90c9..eb06900580 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{DoubleParam, ParamValidators} @@ -27,10 +27,8 @@ import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.sql.types.DataType /** - * :: Experimental :: * Normalize a vector to have unit norm using the given p-norm. */ -@Experimental @Since("1.4.0") class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 01828ede6b..8b04b5de6f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute._ import org.apache.spark.ml.linalg.Vectors @@ -29,7 +29,6 @@ import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{DoubleType, NumericType, StructType} /** - * :: Experimental :: * A one-hot encoder that maps a column of category indices to a column of binary vectors, with * at most a single one-value per row that indicates the input category index. * For example with 5 categories, an input value of 2.0 would map to an output vector of @@ -42,7 +41,6 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType} * * @see [[StringIndexer]] for converting categorical values into category indices */ -@Experimental @Since("1.4.0") class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index ef8b08545d..6b913480fd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml._ import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param._ @@ -59,12 +59,11 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC } } + /** - * :: Experimental :: * PCA trains a model to project vectors to a lower dimensional space of the top [[PCA!.k]] * principal components. */ -@Experimental @Since("1.5.0") class PCA @Since("1.5.0") ( @Since("1.5.0") override val uid: String) @@ -116,14 +115,12 @@ object PCA extends DefaultParamsReadable[PCA] { } /** - * :: Experimental :: * Model fitted by [[PCA]]. Transforms vectors to a lower dimensional space. * * @param pc A principal components Matrix. Each column is one principal component. * @param explainedVariance A vector of proportions of variance explained by * each principal component. */ -@Experimental @Since("1.5.0") class PCAModel private[ml] ( @Since("1.5.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 7b35fdeaf4..72fb35bd79 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators} @@ -27,14 +27,12 @@ import org.apache.spark.ml.util._ import org.apache.spark.sql.types.DataType /** - * :: Experimental :: * Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion, * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an * expansion of a product of sums expresses it as a sum of products by using the fact that * multiplication distributes over addition". Take a 2-variable feature vector as an example: * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ -@Experimental @Since("1.4.0") class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index 96b8e7d9f7..9a636bd8a5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml._ import org.apache.spark.ml.attribute.NominalAttribute @@ -64,7 +64,6 @@ private[feature] trait QuantileDiscretizerBase extends Params } /** - * :: Experimental :: * `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned * categorical features. The number of bins can be set using the `numBuckets` parameter. * The bin ranges are chosen using an approximate algorithm (see the documentation for @@ -73,7 +72,6 @@ private[feature] trait QuantileDiscretizerBase extends Params * `relativeError` parameter. The lower and upper bin bounds will be `-Infinity` and `+Infinity`, * covering all real values. */ -@Experimental @Since("1.6.0") final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index b8715746fe..289037640f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.ml.Transformer import org.apache.spark.ml.util._ @@ -25,7 +25,6 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} import org.apache.spark.sql.types.StructType /** - * :: Experimental :: * Implements the transformations which are defined by SQL statement. * Currently we only support SQL syntax like 'SELECT ... FROM __THIS__ ...' * where '__THIS__' represents the underlying table of the input dataset. @@ -37,7 +36,6 @@ import org.apache.spark.sql.types.StructType * - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5 * - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b */ -@Experimental @Since("1.6.0") class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index b4be95494f..2494cf51a2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml._ import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param._ @@ -76,7 +76,6 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with } /** - * :: Experimental :: * Standardizes features by removing the mean and scaling to unit variance using column summary * statistics on the samples in the training set. * @@ -85,7 +84,6 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with * corrected sample standard deviation]], * which is computed as the square root of the unbiased sample variance. */ -@Experimental @Since("1.2.0") class StandardScaler @Since("1.4.0") ( @Since("1.4.0") override val uid: String) @@ -138,13 +136,11 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] { } /** - * :: Experimental :: * Model fitted by [[StandardScaler]]. * * @param std Standard deviation of the StandardScalerModel * @param mean Mean of the StandardScalerModel */ -@Experimental @Since("1.2.0") class StandardScalerModel private[ml] ( @Since("1.4.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 1a6f42f773..666070037c 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} @@ -27,12 +27,10 @@ import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{ArrayType, StringType, StructType} /** - * :: Experimental :: * A feature transformer that filters out stop words from input. * Note: null values from input array are preserved unless adding null to stopWords explicitly. * @see [[http://en.wikipedia.org/wiki/Stop_words]] */ -@Experimental @Since("1.5.0") class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 028e540fe5..fe79e2ec80 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model, Transformer} import org.apache.spark.ml.attribute.{Attribute, NominalAttribute} import org.apache.spark.ml.param._ @@ -55,7 +55,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha } /** - * :: Experimental :: * A label indexer that maps a string column of labels to an ML column of label indices. * If the input column is numeric, we cast it to string and index the string values. * The indices are in [0, numLabels), ordered by label frequencies. @@ -63,7 +62,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha * * @see [[IndexToString]] for the inverse transformation */ -@Experimental @Since("1.4.0") class StringIndexer @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel] @@ -112,7 +110,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] { } /** - * :: Experimental :: * Model fitted by [[StringIndexer]]. * * NOTE: During transformation, if the input column does not exist, @@ -121,7 +118,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] { * * @param labels Ordered list of labels, corresponding to indices to be assigned. */ -@Experimental @Since("1.4.0") class StringIndexerModel ( @Since("1.4.0") override val uid: String, @@ -250,7 +246,6 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] { } /** - * :: Experimental :: * A [[Transformer]] that maps a column of indices back to a new column of corresponding * string values. * The index-string mapping is either from the ML attributes of the input column, @@ -258,7 +253,6 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] { * * @see [[StringIndexer]] for converting strings into indices */ -@Experimental @Since("1.5.0") class IndexToString private[ml] (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 010c948749..45d8fa94a8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -17,19 +17,17 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} /** - * :: Experimental :: * A tokenizer that converts the input string to lowercase and then splits it by white spaces. * * @see [[RegexTokenizer]] */ -@Experimental @Since("1.2.0") class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable { @@ -59,13 +57,11 @@ object Tokenizer extends DefaultParamsReadable[Tokenizer] { } /** - * :: Experimental :: * A regex based tokenizer that extracts tokens either by using the provided regex pattern to split * the text (default) or repeatedly matching the regex (if `gaps` is false). * Optional parameters also allow filtering tokens using a minimal length. * It returns an array of strings that can be empty. */ -@Experimental @Since("1.4.0") class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], RegexTokenizer] with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 4939dabd98..142a2ae44c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute} import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} @@ -32,10 +32,8 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ /** - * :: Experimental :: * A feature transformer that merges multiple columns into a vector column. */ -@Experimental @Since("1.4.0") class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 5656a9f979..d1a5c2e825 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.attribute._ import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, VectorUDT} @@ -59,7 +59,6 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu } /** - * :: Experimental :: * Class for indexing categorical feature columns in a dataset of [[Vector]]. * * This has 2 usage modes: @@ -93,7 +92,6 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * - Add warning if a categorical feature has only 1 category. * - Add option for allowing unknown categories. */ -@Experimental @Since("1.4.0") class VectorIndexer @Since("1.4.0") ( @Since("1.4.0") override val uid: String) @@ -247,7 +245,6 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] { } /** - * :: Experimental :: * Model fitted by [[VectorIndexer]]. Transform categorical features to use 0-based indices * instead of their original values. * - Categorical features are mapped to indices. @@ -263,7 +260,6 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] { * Values are maps from original features values to 0-based category indices. * If a feature is not in this map, it is treated as continuous. */ -@Experimental @Since("1.4.0") class VectorIndexerModel private[ml] ( @Since("1.4.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 6769e490c5..966ccb85d0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} import org.apache.spark.ml.linalg._ @@ -29,7 +29,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.StructType /** - * :: Experimental :: * This class takes a feature vector and outputs a new feature vector with a subarray of the * original features. * @@ -40,7 +39,6 @@ import org.apache.spark.sql.types.StructType * The output vector will order features with the selected indices first (in the order given), * followed by the selected names (in the order given). */ -@Experimental @Since("1.5.0") final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 0cac3fa2d7..c2b434c3d5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -19,8 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT} import org.apache.spark.ml.param._ @@ -115,11 +114,9 @@ private[feature] trait Word2VecBase extends Params } /** - * :: Experimental :: * Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further * natural language processing or machine learning process. */ -@Experimental @Since("1.4.0") final class Word2Vec @Since("1.4.0") ( @Since("1.4.0") override val uid: String) @@ -202,10 +199,8 @@ object Word2Vec extends DefaultParamsReadable[Word2Vec] { } /** - * :: Experimental :: * Model fitted by [[Word2Vec]]. */ -@Experimental @Since("1.4.0") class Word2VecModel private[ml] ( @Since("1.4.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index ecec61a72f..e7780cf1c3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -28,9 +28,9 @@ import scala.collection.JavaConverters._ import org.json4s._ import org.json4s.jackson.JsonMethods._ -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} -import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.ml.linalg.JsonVectorConverter +import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.util.Identifiable /** @@ -510,11 +510,9 @@ class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[In } /** - * :: Experimental :: * A param and its value. */ @Since("1.2.0") -@Experimental case class ParamPair[T] @Since("1.2.0") ( @Since("1.2.0") param: Param[T], @Since("1.2.0") value: T) { @@ -797,11 +795,9 @@ trait Params extends Identifiable with Serializable { abstract class JavaParams extends Params /** - * :: Experimental :: * A param to value map. */ @Since("1.2.0") -@Experimental final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) extends Serializable { @@ -952,7 +948,6 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) } @Since("1.2.0") -@Experimental object ParamMap { /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 5dc2433e55..a2c4c26911 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -26,12 +26,12 @@ import scala.util.{Sorting, Try} import scala.util.hashing.byteswap64 import com.github.fommil.netlib.BLAS.{getInstance => blas} -import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats import org.json4s.JsonDSL._ import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext} -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ @@ -222,14 +222,12 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w } /** - * :: Experimental :: * Model fitted by ALS. * * @param rank rank of the matrix factorization model * @param userFactors a DataFrame that stores user factors in two columns: `id` and `features` * @param itemFactors a DataFrame that stores item factors in two columns: `id` and `features` */ -@Experimental @Since("1.3.0") class ALSModel private[ml] ( @Since("1.4.0") override val uid: String, @@ -333,7 +331,6 @@ object ALSModel extends MLReadable[ALSModel] { } /** - * :: Experimental :: * Alternating Least Squares (ALS) matrix factorization. * * ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices, @@ -362,7 +359,6 @@ object ALSModel extends MLReadable[ALSModel] { * indicated user * preferences rather than explicit ratings given to items. */ -@Experimental @Since("1.3.0") class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel] with ALSParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 7ff6d0afd5..ebc6c12ddc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.Vector @@ -38,13 +38,11 @@ import org.apache.spark.sql.functions._ /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm * for regression. * It supports both continuous and categorical features. */ @Since("1.4.0") -@Experimental class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] with DecisionTreeRegressorParams with DefaultParamsWritable { @@ -125,7 +123,6 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S } @Since("1.4.0") -@Experimental object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor] { /** Accessor for supported impurities: variance */ final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities @@ -135,13 +132,11 @@ object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor } /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression. * It supports both continuous and categorical features. * @param rootNode Root of the decision tree */ @Since("1.4.0") -@Experimental class DecisionTreeRegressionModel private[ml] ( override val uid: String, override val rootNode: Node, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 6223555504..ce355938ec 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -38,7 +38,6 @@ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * learning algorithm for regression. * It supports both continuous and categorical features. @@ -56,7 +55,6 @@ import org.apache.spark.sql.functions._ * [https://issues.apache.org/jira/browse/SPARK-4240] */ @Since("1.4.0") -@Experimental class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, GBTRegressor, GBTRegressionModel] with GBTRegressorParams with DefaultParamsWritable with Logging { @@ -135,7 +133,6 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) } @Since("1.4.0") -@Experimental object GBTRegressor extends DefaultParamsReadable[GBTRegressor] { /** Accessor for supported loss settings: squared (L2), absolute (L1) */ @@ -147,8 +144,6 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] { } /** - * :: Experimental :: - * * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * model for regression. * It supports both continuous and categorical features. @@ -156,7 +151,6 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] { * @param _treeWeights Weights for the decision trees in the ensemble. */ @Since("1.4.0") -@Experimental class GBTRegressionModel private[ml]( override val uid: String, private val _trees: Array[DecisionTreeRegressionModel], diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 9b9429a328..35396446ed 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.regression import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} @@ -120,7 +120,6 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures } /** - * :: Experimental :: * Isotonic regression. * * Currently implemented using parallelized pool adjacent violators algorithm. @@ -129,7 +128,6 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures * Uses [[org.apache.spark.mllib.regression.IsotonicRegression]]. */ @Since("1.5.0") -@Experimental class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase with DefaultParamsWritable { @@ -192,7 +190,6 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] { } /** - * :: Experimental :: * Model fitted by IsotonicRegression. * Predicts using a piecewise linear function. * @@ -202,7 +199,6 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] { * model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]]. */ @Since("1.5.0") -@Experimental class IsotonicRegressionModel private[ml] ( override val uid: String, private val oldModel: MLlibIsotonicRegressionModel) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 6b82ae14e1..c57e9eb0bf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -54,7 +54,6 @@ private[regression] trait LinearRegressionParams extends PredictorParams with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver /** - * :: Experimental :: * Linear regression. * * The learning objective is to minimize the squared error, with regularization. @@ -68,7 +67,6 @@ private[regression] trait LinearRegressionParams extends PredictorParams * - L2 + L1 (elastic net) */ @Since("1.3.0") -@Experimental class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String) extends Regressor[Vector, LinearRegression, LinearRegressionModel] with LinearRegressionParams with DefaultParamsWritable with Logging { @@ -387,11 +385,9 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] { } /** - * :: Experimental :: * Model produced by [[LinearRegression]]. */ @Since("1.3.0") -@Experimental class LinearRegressionModel private[ml] ( @Since("1.4.0") override val uid: String, @Since("2.0.0") val coefficients: Vector, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index 4f4d3d2784..0ad00aa6f9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.regression import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.Vector @@ -37,12 +37,10 @@ import org.apache.spark.sql.functions._ /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression. * It supports both continuous and categorical features. */ @Since("1.4.0") -@Experimental class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel] with RandomForestRegressorParams with DefaultParamsWritable { @@ -118,7 +116,6 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S } @Since("1.4.0") -@Experimental object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor]{ /** Accessor for supported impurity settings: variance */ @Since("1.4.0") @@ -135,7 +132,6 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor } /** - * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for regression. * It supports both continuous and categorical features. * @@ -143,7 +139,6 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor * @param numFeatures Number of features used by this model */ @Since("1.4.0") -@Experimental class RandomForestRegressionModel private[ml] ( override val uid: String, private val _trees: Array[DecisionTreeRegressionModel], diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala index d5e5c45460..8144bcb7d4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala @@ -17,17 +17,14 @@ package org.apache.spark.ml.tree -import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.linalg.Vector import org.apache.spark.mllib.tree.impurity.ImpurityCalculator import org.apache.spark.mllib.tree.model.{ImpurityStats, InformationGainStats => OldInformationGainStats, Node => OldNode, Predict => OldPredict} /** - * :: DeveloperApi :: * Decision tree node interface. */ -@DeveloperApi sealed abstract class Node extends Serializable { // TODO: Add aggregate stats (once available). This will happen after we move the DecisionTree @@ -109,12 +106,10 @@ private[ml] object Node { } /** - * :: DeveloperApi :: * Decision tree leaf node. * @param prediction Prediction this node makes * @param impurity Impurity measure at this node (for training data) */ -@DeveloperApi class LeafNode private[ml] ( override val prediction: Double, override val impurity: Double, @@ -147,7 +142,6 @@ class LeafNode private[ml] ( } /** - * :: DeveloperApi :: * Internal Decision Tree node. * @param prediction Prediction this node would make if it were a leaf node * @param impurity Impurity measure at this node (for training data) @@ -157,7 +151,6 @@ class LeafNode private[ml] ( * @param rightChild Right-hand child node * @param split Information about the test used to split to the left or right child. */ -@DeveloperApi class InternalNode private[ml] ( override val prediction: Double, override val impurity: Double, @@ -167,6 +160,9 @@ class InternalNode private[ml] ( val split: Split, override private[ml] val impurityStats: ImpurityCalculator) extends Node { + // Note to developers: The constructor argument impurityStats should be reconsidered before we + // make the constructor public. We may be able to improve the representation. + override def toString: String = { s"InternalNode(prediction = $prediction, impurity = $impurity, split = $split)" } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala index 9704e15cd8..47fe3524f2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala @@ -19,18 +19,16 @@ package org.apache.spark.ml.tree import java.util.Objects -import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.Vector import org.apache.spark.mllib.tree.configuration.{FeatureType => OldFeatureType} import org.apache.spark.mllib.tree.model.{Split => OldSplit} /** - * :: DeveloperApi :: * Interface for a "Split," which specifies a test made at a decision tree node * to choose the left or right path. */ -@DeveloperApi sealed trait Split extends Serializable { /** Index of feature which this split tests */ @@ -67,14 +65,12 @@ private[tree] object Split { } /** - * :: DeveloperApi :: * Split which tests a categorical feature. * @param featureIndex Index of the feature to test * @param _leftCategories If the feature value is in this set of categories, then the split goes * left. Otherwise, it goes right. * @param numCategories Number of categories for this feature. */ -@DeveloperApi class CategoricalSplit private[ml] ( override val featureIndex: Int, _leftCategories: Array[Double], @@ -153,13 +149,11 @@ class CategoricalSplit private[ml] ( } /** - * :: DeveloperApi :: * Split which tests a continuous feature. * @param featureIndex Index of the feature to test * @param threshold If the feature value is <= this threshold, then the split goes left. * Otherwise, it goes right. */ -@DeveloperApi class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double) extends Split { diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala index 7d42da4a2f..520557849b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala @@ -25,7 +25,7 @@ import com.github.fommil.netlib.F2jBLAS import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml._ import org.apache.spark.ml.evaluation.Evaluator @@ -55,11 +55,9 @@ private[ml] trait CrossValidatorParams extends ValidatorParams { } /** - * :: Experimental :: * K-fold cross validation. */ @Since("1.2.0") -@Experimental class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Estimator[CrossValidatorModel] with CrossValidatorParams with MLWritable with Logging { @@ -190,7 +188,6 @@ object CrossValidator extends MLReadable[CrossValidator] { } /** - * :: Experimental :: * Model from k-fold cross validation. * * @param bestModel The best model selected from k-fold cross validation. @@ -198,7 +195,6 @@ object CrossValidator extends MLReadable[CrossValidator] { * [[CrossValidator.estimatorParamMaps]], in the corresponding order. */ @Since("1.2.0") -@Experimental class CrossValidatorModel private[ml] ( @Since("1.4.0") override val uid: String, @Since("1.2.0") val bestModel: Model[_], diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala index 7d12f447f7..d369e7a61c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala @@ -20,15 +20,13 @@ package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ /** - * :: Experimental :: * Builder for a param grid used in grid search-based model selection. */ @Since("1.2.0") -@Experimental class ParamGridBuilder @Since("1.2.0") { private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]] diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala index f6f2bad401..0fdba1cb88 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala @@ -25,7 +25,7 @@ import scala.language.existentials import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.evaluation.Evaluator @@ -54,14 +54,12 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams { } /** - * :: Experimental :: * Validation for hyper-parameter tuning. * Randomly splits the input dataset into train and validation sets, * and uses evaluation metric on the validation set to select the best model. * Similar to [[CrossValidator]], but only splits the set once. */ @Since("1.5.0") -@Experimental class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[TrainValidationSplitModel] with TrainValidationSplitParams with MLWritable with Logging { @@ -188,7 +186,6 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] { } /** - * :: Experimental :: * Model from train validation split. * * @param uid Id. @@ -196,7 +193,6 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] { * @param validationMetrics Evaluated validation metrics. */ @Since("1.5.0") -@Experimental class TrainValidationSplitModel private[ml] ( @Since("1.5.0") override val uid: String, @Since("1.5.0") val bestModel: Model[_], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala index 91edcf2a79..f1664ce4ab 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala @@ -22,7 +22,7 @@ import java.util.Random import scala.annotation.tailrec import scala.collection.mutable -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} @@ -31,8 +31,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: - * * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" * by Steinbach, Karypis, and Kumar, with modification to fit Spark. * The algorithm starts from a single cluster that contains all points. @@ -54,7 +52,6 @@ import org.apache.spark.storage.StorageLevel * KDD Workshop on Text Mining, 2000.]] */ @Since("1.6.0") -@Experimental class BisectingKMeans private ( private var k: Int, private var maxIterations: Int, @@ -398,8 +395,6 @@ private object BisectingKMeans extends Serializable { } /** - * :: Experimental :: - * * Represents a node in a clustering tree. * * @param index node index, negative for internal nodes and non-negative for leaf nodes @@ -411,7 +406,6 @@ private object BisectingKMeans extends Serializable { * @param children children nodes */ @Since("1.6.0") -@Experimental private[clustering] class ClusteringTreeNode private[clustering] ( val index: Int, val size: Long, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index 11fd940b8b..8438015cce 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -23,7 +23,7 @@ import org.json4s.jackson.JsonMethods._ import org.json4s.JsonDSL._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.Vector @@ -32,8 +32,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} /** - * :: Experimental :: - * * Clustering model produced by [[BisectingKMeans]]. * The prediction is done level-by-level from the root node to a leaf node, and at each node among * its children the closest to the input point is selected. @@ -41,7 +39,6 @@ import org.apache.spark.sql.{Row, SparkSession} * @param root the root node of the clustering tree */ @Since("1.6.0") -@Experimental class BisectingKMeansModel private[clustering] ( private[clustering] val root: ClusteringTreeNode ) extends Serializable with Saveable with Logging { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index d295826300..9ebba1de0d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -25,7 +25,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.{JavaPairRDD, JavaRDD} import org.apache.spark.graphx.{Edge, EdgeContext, Graph, VertexId} import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors} @@ -426,13 +426,10 @@ class LocalLDAModel private[spark] ( } /** - * :: Experimental :: - * * Local (non-distributed) model fitted by [[LDA]]. * * This model stores the inferred topics only; it does not store info about the training dataset. */ -@Experimental @Since("1.5.0") object LocalLDAModel extends Loader[LocalLDAModel] { @@ -822,15 +819,12 @@ class DistributedLDAModel private[clustering] ( } /** - * :: Experimental :: - * * Distributed model fitted by [[LDA]]. * This type of model is currently only produced by Expectation-Maximization (EM). * * This model stores the inferred topics, the full training dataset, and the topic distribution * for each training document. */ -@Experimental @Since("1.5.0") object DistributedLDAModel extends Loader[DistributedLDAModel] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala index 9a63cc29da..3c26d26708 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.fpm import scala.collection.JavaConverters._ import scala.reflect.ClassTag -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.JavaSparkContext.fakeClassTag import org.apache.spark.internal.Logging @@ -28,14 +28,11 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset import org.apache.spark.rdd.RDD /** - * :: Experimental :: - * * Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates * association rules which have a single item as the consequent. * */ @Since("1.5.0") -@Experimental class AssociationRules private[fpm] ( private var minConfidence: Double) extends Logging with Serializable { @@ -95,8 +92,6 @@ class AssociationRules private[fpm] ( object AssociationRules { /** - * :: Experimental :: - * * An association rule between sets of items. * @param antecedent hypotheses of the rule. Java users should call [[Rule#javaAntecedent]] * instead. @@ -106,7 +101,6 @@ object AssociationRules { * */ @Since("1.5.0") - @Experimental class Rule[Item] private[fpm] ( @Since("1.5.0") val antecedent: Array[Item], @Since("1.5.0") val consequent: Array[Item], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index c13c794775..7382000791 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -30,7 +30,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods.{compact, render} import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.JavaSparkContext.fakeClassTag import org.apache.spark.internal.Logging @@ -42,8 +42,6 @@ import org.apache.spark.sql.types._ import org.apache.spark.storage.StorageLevel /** - * :: Experimental :: - * * A parallel PrefixSpan algorithm to mine frequent sequential patterns. * The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns * Efficiently by Prefix-Projected Pattern Growth ([[http://doi.org/10.1109/ICDE.2001.914830]]). @@ -60,7 +58,6 @@ import org.apache.spark.storage.StorageLevel * @see [[https://en.wikipedia.org/wiki/Sequential_Pattern_Mining Sequential Pattern Mining * (Wikipedia)]] */ -@Experimental @Since("1.5.0") class PrefixSpan private ( private var minSupport: Double, @@ -230,7 +227,6 @@ class PrefixSpan private ( } -@Experimental @Since("1.5.0") object PrefixSpan extends Logging { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala index 4591cb88ef..8024b1c003 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.linalg -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since /** * Represents singular value decomposition (SVD) factors. @@ -26,10 +26,8 @@ import org.apache.spark.annotation.{Experimental, Since} case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType) /** - * :: Experimental :: * Represents QR factors. */ @Since("1.5.0") -@Experimental case class QRDecomposition[QType, RType](Q: QType, R: RType) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala index 480a64548c..f372355005 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer import breeze.linalg.{norm, DenseVector => BDV} -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD @@ -53,11 +53,9 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va } /** - * :: Experimental :: * Set fraction of data to be used for each SGD iteration. * Default 1.0 (corresponding to deterministic/classical gradient descent) */ - @Experimental def setMiniBatchFraction(fraction: Double): this.type = { require(fraction > 0 && fraction <= 1.0, s"Fraction for mini-batch SGD must be in range (0, 1] but got ${fraction}") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala index 274ac7c995..5d61796f1d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala @@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult import org.jpmml.model.JAXBUtil import org.apache.spark.SparkContext -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory /** @@ -45,20 +45,16 @@ trait PMMLExportable { } /** - * :: Experimental :: * Export the model to a local file in PMML format */ - @Experimental @Since("1.4.0") def toPMML(localPath: String): Unit = { toPMML(new StreamResult(new File(localPath))) } /** - * :: Experimental :: * Export the model to a directory on a distributed file system in PMML format */ - @Experimental @Since("1.4.0") def toPMML(sc: SparkContext, path: String): Unit = { val pmml = toPMML() @@ -66,20 +62,16 @@ trait PMMLExportable { } /** - * :: Experimental :: * Export the model to the OutputStream in PMML format */ - @Experimental @Since("1.4.0") def toPMML(outputStream: OutputStream): Unit = { toPMML(new StreamResult(outputStream)) } /** - * :: Experimental :: * Export the model to a String in PMML format */ - @Experimental @Since("1.4.0") def toPMML(): String = { val writer = new StringWriter diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala index 4c382d7c2b..97c032de7a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.stat.test import scala.beans.BeanInfo -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.dstream.DStream @@ -42,7 +42,6 @@ case class BinarySample @Since("1.6.0") ( } /** - * :: Experimental :: * Performs online 2-sample significance testing for a stream of (Boolean, Double) pairs. The * Boolean identifies which sample each observation comes from, and the Double is the numeric value * of the observation. @@ -67,7 +66,6 @@ case class BinarySample @Since("1.6.0") ( * .registerStream(DStream) * }}} */ -@Experimental @Since("1.6.0") class StreamingTest @Since("1.6.0") () extends Logging with Serializable { private var peacePeriod: Int = 0 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala index 8a29fd39a9..5cfc05a3dd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.stat.test -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since /** * Trait for hypothesis test results. @@ -94,10 +94,8 @@ class ChiSqTestResult private[stat] (override val pValue: Double, } /** - * :: Experimental :: * Object containing the test results for the Kolmogorov-Smirnov test. */ -@Experimental @Since("1.5.0") class KolmogorovSmirnovTestResult private[stat] ( @Since("1.5.0") override val pValue: Double, @@ -113,10 +111,8 @@ class KolmogorovSmirnovTestResult private[stat] ( } /** - * :: Experimental :: * Object containing the test results for streaming testing. */ -@Experimental @Since("1.6.0") private[stat] class StreamingTestResult @Since("1.6.0") ( @Since("1.6.0") override val pValue: Double, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala index 853c7319ec..2436ce4086 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala @@ -17,14 +17,12 @@ package org.apache.spark.mllib.tree.configuration -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since /** - * :: Experimental :: * Enum to select the algorithm for the decision tree */ @Since("1.0.0") -@Experimental object Algo extends Enumeration { @Since("1.0.0") type Algo = Value diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala index 3a731f45d6..d4448da9ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala @@ -17,14 +17,12 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} /** - * :: Experimental :: * Class for calculating entropy during multiclass classification. */ @Since("1.0.0") -@Experimental object Entropy extends Impurity { private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala index 7730c0a8c1..22e70278a6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala @@ -17,16 +17,14 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} /** - * :: Experimental :: * Class for calculating the * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]] * during multiclass classification. */ @Since("1.0.0") -@Experimental object Gini extends Impurity { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala index 65f0163ec6..a5bdc2c6d2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala @@ -17,17 +17,15 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} /** - * :: Experimental :: * Trait for calculating information gain. * This trait is used for * (a) setting the impurity parameter in [[org.apache.spark.mllib.tree.configuration.Strategy]] * (b) calculating impurity values from sufficient statistics. */ @Since("1.0.0") -@Experimental trait Impurity extends Serializable { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala index 2423516123..c9bf0db4de 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala @@ -17,14 +17,12 @@ package org.apache.spark.mllib.tree.impurity -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} /** - * :: Experimental :: * Class for calculating variance during regression */ @Since("1.0.0") -@Experimental object Variance extends Impurity { /** diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index c035942f73..3c4af90aca 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -49,8 +49,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Logistic regression. Currently, this class only supports binary classification. @@ -216,8 +214,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by LogisticRegression. .. versionadded:: 1.3.0 @@ -277,6 +273,8 @@ class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): class LogisticRegressionSummary(JavaWrapper): """ + .. note:: Experimental + Abstraction for Logistic Regression Results for a given model. .. versionadded:: 2.0.0 @@ -321,6 +319,8 @@ class LogisticRegressionSummary(JavaWrapper): @inherit_doc class LogisticRegressionTrainingSummary(LogisticRegressionSummary): """ + .. note:: Experimental + Abstraction for multinomial Logistic Regression Training results. Currently, the training summary ignores the training weights except for the objective trace. @@ -501,8 +501,6 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - `Decision tree `_ learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical @@ -599,8 +597,6 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred @inherit_doc class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by DecisionTreeClassifier. .. versionadded:: 1.4.0 @@ -634,8 +630,6 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred RandomForestParams, TreeClassifierParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - `Random Forest `_ learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical @@ -730,8 +724,6 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by RandomForestClassifier. .. versionadded:: 1.4.0 @@ -764,8 +756,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - `Gradient-Boosted Trees (GBTs) `_ learning algorithm for classification. It supports binary labels, as well as both continuous and categorical features. @@ -885,8 +875,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by GBTClassifier. .. versionadded:: 1.4.0 @@ -918,8 +906,6 @@ class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable) class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Naive Bayes Classifiers. It supports both Multinomial and Bernoulli NB. `Multinomial NB `_ @@ -1043,8 +1029,6 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by NaiveBayes. .. versionadded:: 1.5.0 diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index bbbb94f9a0..2881380152 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -60,8 +60,6 @@ __all__ = ['Binarizer', @inherit_doc class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Binarize a column of continuous features given a threshold. >>> df = spark.createDataFrame([(0.5,)], ["values"]) @@ -125,8 +123,6 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java @inherit_doc class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Maps a column of continuous features to a column of feature buckets. >>> df = spark.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"]) @@ -200,8 +196,6 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav @inherit_doc class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`. >>> df = spark.createDataFrame( @@ -348,8 +342,6 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`CountVectorizer`. .. versionadded:: 1.6.0 @@ -367,8 +359,6 @@ class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable): @inherit_doc class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero padding is performed on the input vector. It returns a real vector of the same length representing the DCT. @@ -439,8 +429,6 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. @@ -505,8 +493,6 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Maps a sequence of terms to their term frequencies using the hashing trick. Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32) to calculate the hash code value for the term object. @@ -576,8 +562,6 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java @inherit_doc class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Compute the Inverse Document Frequency (IDF) given a collection of documents. >>> from pyspark.ml.linalg import DenseVector @@ -653,8 +637,6 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`IDF`. .. versionadded:: 1.4.0 @@ -752,8 +734,6 @@ class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): @inherit_doc class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Rescale each feature individually to a common range [min, max] linearly using column summary statistics, which is also known as min-max normalization or Rescaling. The rescaled value for feature E is calculated as, @@ -859,8 +839,6 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`MinMaxScaler`. .. versionadded:: 1.6.0 @@ -887,8 +865,6 @@ class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): @ignore_unicode_prefix class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A feature transformer that converts the input array of strings into an array of n-grams. Null values in the input array are ignored. It returns an array of n-grams where each n-gram is represented by a space-separated string of @@ -965,8 +941,6 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr @inherit_doc class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Normalize a vector to have unit norm using the given p-norm. >>> from pyspark.ml.linalg import Vectors @@ -1031,8 +1005,6 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav @inherit_doc class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A one-hot encoder that maps a column of category indices to a column of binary vectors, with at most a single one-value per row that indicates the input category index. @@ -1114,8 +1086,6 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion `_, "In mathematics, an expansion of a product of sums expresses it as a sum of products by using the fact that @@ -1287,8 +1257,6 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab @ignore_unicode_prefix class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A regex based tokenizer that extracts tokens either by using the provided regex pattern (in Java dialect) to split the text (default) or repeatedly matching the regex (if gaps is false). @@ -1418,8 +1386,6 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, @inherit_doc class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Implements the transforms which are defined by SQL statement. Currently we only support SQL syntax like 'SELECT ... FROM __THIS__' where '__THIS__' represents the underlying table of the input dataset. @@ -1479,8 +1445,6 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable): @inherit_doc class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Standardizes features by removing the mean and scaling to unit variance using column summary statistics on the samples in the training set. @@ -1576,8 +1540,6 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`StandardScaler`. .. versionadded:: 1.4.0 @@ -1604,8 +1566,6 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A label indexer that maps a string column of labels to an ML column of label indices. If the input column is numeric, we cast it to string and index the string values. The indices are in [0, numLabels), ordered by label frequencies. @@ -1668,8 +1628,6 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`StringIndexer`. .. versionadded:: 1.4.0 @@ -1687,8 +1645,6 @@ class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): @inherit_doc class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A :py:class:`Transformer` that maps a column of indices back to a new column of corresponding string values. The index-string mapping is either from the ML attributes of the input column, @@ -1741,8 +1697,6 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A feature transformer that filters out stop words from input. Note: null values from input array are preserved unless adding null to stopWords explicitly. @@ -1833,8 +1787,6 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl @ignore_unicode_prefix class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A tokenizer that converts the input string to lowercase and then splits it by white spaces. @@ -1888,8 +1840,6 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java @inherit_doc class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A feature transformer that merges multiple columns into a vector column. >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"]) @@ -1934,8 +1884,6 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl @inherit_doc class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Class for indexing categorical feature columns in a dataset of `Vector`. This has 2 usage modes: @@ -2050,8 +1998,6 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`VectorIndexer`. Transform categorical features to use 0-based indices instead of their original values. @@ -2089,8 +2035,6 @@ class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): @inherit_doc class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - This class takes a feature vector and outputs a new feature vector with a subarray of the original features. @@ -2183,8 +2127,6 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further natural language processing or machine learning process. @@ -2352,8 +2294,6 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`Word2Vec`. .. versionadded:: 1.4.0 @@ -2383,8 +2323,6 @@ class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable): @inherit_doc class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - PCA trains a model to project vectors to a lower dimensional space of the top :py:attr:`k` principal components. @@ -2458,8 +2396,6 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space. .. versionadded:: 1.5.0 diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 8de9ad8531..d88dc75353 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -41,8 +41,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Linear regression. The learning objective is to minimize the squared error, with regularization. @@ -130,8 +128,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by :class:`LinearRegression`. .. versionadded:: 1.4.0 @@ -411,8 +407,6 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary): class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Currently implemented using parallelized pool adjacent violators algorithm. Only univariate (single feature) algorithm supported. @@ -439,6 +433,8 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti True >>> model.predictions == model2.predictions True + + .. versionadded:: 1.6.0 """ isotonic = \ @@ -505,13 +501,13 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by :class:`IsotonicRegression`. + + .. versionadded:: 1.6.0 """ @property - @since("2.0.0") + @since("1.6.0") def boundaries(self): """ Boundaries in increasing order for which predictions are known. @@ -519,7 +515,7 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): return self._call_java("boundaries") @property - @since("2.0.0") + @since("1.6.0") def predictions(self): """ Predictions associated with the boundaries at the same index, monotone because of isotonic @@ -642,8 +638,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol): """ - .. note:: Experimental - `Decision tree `_ learning algorithm for regression. It supports both continuous and categorical features. @@ -727,8 +721,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi @inherit_doc class DecisionTreeModel(JavaModel): """ - .. note:: Experimental - Abstraction for Decision Tree models. .. versionadded:: 1.5.0 @@ -759,11 +751,9 @@ class DecisionTreeModel(JavaModel): @inherit_doc class TreeEnsembleModels(JavaModel): """ - .. note:: Experimental + (private abstraction) Represents a tree ensemble model. - - .. versionadded:: 1.5.0 """ @property @@ -803,8 +793,6 @@ class TreeEnsembleModels(JavaModel): @inherit_doc class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by :class:`DecisionTreeRegressor`. .. versionadded:: 1.4.0 @@ -837,8 +825,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi RandomForestParams, TreeRegressorParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - `Random Forest `_ learning algorithm for regression. It supports both continuous and categorical features. @@ -925,8 +911,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by :class:`RandomForestRegressor`. .. versionadded:: 1.4.0 @@ -959,8 +943,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable, JavaMLReadable, TreeRegressorParams): """ - .. note:: Experimental - `Gradient-Boosted Trees (GBTs) `_ learning algorithm for regression. It supports both continuous and categorical features. @@ -1067,8 +1049,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by :class:`GBTRegressor`. .. versionadded:: 1.4.0 diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index f857c5e8c8..298314d46c 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -33,8 +33,6 @@ __all__ = ['ParamGridBuilder', 'CrossValidator', 'CrossValidatorModel', 'TrainVa class ParamGridBuilder(object): r""" - .. note:: Experimental - Builder for a param grid used in grid search-based model selection. >>> from pyspark.ml.classification import LogisticRegression @@ -145,8 +143,6 @@ class ValidatorParams(HasSeed): class CrossValidator(Estimator, ValidatorParams): """ - .. note:: Experimental - K-fold cross validation. >>> from pyspark.ml.classification import LogisticRegression @@ -264,8 +260,6 @@ class CrossValidator(Estimator, ValidatorParams): class CrossValidatorModel(Model, ValidatorParams): """ - .. note:: Experimental - Model from k-fold cross validation. .. versionadded:: 1.4.0 diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 3734f87405..9f53ed0982 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -48,8 +48,6 @@ class LinearClassificationModel(LinearModel): @since('1.4.0') def setThreshold(self, value): """ - .. note:: Experimental - Sets the threshold that separates positive predictions from negative predictions. An example with prediction score greater than or equal to this threshold is identified as a positive, @@ -62,8 +60,6 @@ class LinearClassificationModel(LinearModel): @since('1.4.0') def threshold(self): """ - .. note:: Experimental - Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions. It is used for binary classification only. @@ -73,8 +69,6 @@ class LinearClassificationModel(LinearModel): @since('1.4.0') def clearThreshold(self): """ - .. note:: Experimental - Clears the threshold so that `predict` will output raw prediction scores. It is used for binary classification only. """ diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index c38c543972..c8c3c42774 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -47,8 +47,6 @@ __all__ = ['BisectingKMeansModel', 'BisectingKMeans', 'KMeansModel', 'KMeans', @inherit_doc class BisectingKMeansModel(JavaModelWrapper): """ - .. note:: Experimental - A clustering model derived from the bisecting k-means method. >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2) @@ -120,8 +118,6 @@ class BisectingKMeansModel(JavaModelWrapper): class BisectingKMeans(object): """ - .. note:: Experimental - A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark. @@ -366,8 +362,6 @@ class KMeans(object): class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ - .. note:: Experimental - A clustering model derived from the Gaussian Mixture Model method. >>> from pyspark.mllib.linalg import Vectors, DenseMatrix @@ -513,8 +507,6 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): class GaussianMixture(object): """ - .. note:: Experimental - Learning algorithm for Gaussian Mixtures using the expectation-maximization algorithm. .. versionadded:: 1.3.0 @@ -565,8 +557,6 @@ class GaussianMixture(object): class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ - .. note:: Experimental - Model produced by [[PowerIterationClustering]]. >>> import math @@ -645,8 +635,6 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): class PowerIterationClustering(object): """ - .. note:: Experimental - Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very low-dimensional embedding of a @@ -693,8 +681,6 @@ class PowerIterationClustering(object): class StreamingKMeansModel(KMeansModel): """ - .. note:: Experimental - Clustering model which can perform an online update of the centroids. The update formula for each centroid is given by @@ -794,8 +780,6 @@ class StreamingKMeansModel(KMeansModel): class StreamingKMeans(object): """ - .. note:: Experimental - Provides methods to set k, decayFactor, timeUnit to configure the KMeans algorithm for fitting and predicting on incoming dstreams. More details on how the centroids are updated are provided under the diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index aef91a8ddc..c8a6e33f4d 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -60,8 +60,6 @@ class VectorTransformer(object): class Normalizer(VectorTransformer): """ - .. note:: Experimental - Normalizes samples individually to unit L\ :sup:`p`\ norm For any 1 <= `p` < float('inf'), normalizes samples using @@ -131,8 +129,6 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer): class StandardScalerModel(JavaVectorTransformer): """ - .. note:: Experimental - Represents a StandardScaler model that can transform vectors. .. versionadded:: 1.2.0 @@ -207,8 +203,6 @@ class StandardScalerModel(JavaVectorTransformer): class StandardScaler(object): """ - .. note:: Experimental - Standardizes features by removing the mean and scaling to unit variance using column summary statistics on the samples in the training set. @@ -262,8 +256,6 @@ class StandardScaler(object): class ChiSqSelectorModel(JavaVectorTransformer): """ - .. note:: Experimental - Represents a Chi Squared selector model. .. versionadded:: 1.4.0 @@ -282,8 +274,6 @@ class ChiSqSelectorModel(JavaVectorTransformer): class ChiSqSelector(object): """ - .. note:: Experimental - Creates a ChiSquared feature selector. :param numTopFeatures: number of features that selector will select. @@ -361,8 +351,6 @@ class PCA(object): class HashingTF(object): """ - .. note:: Experimental - Maps a sequence of terms to their term frequencies using the hashing trick. @@ -448,8 +436,6 @@ class IDFModel(JavaVectorTransformer): class IDF(object): """ - .. note:: Experimental - Inverse document frequency (IDF). The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`, @@ -697,8 +683,6 @@ class Word2Vec(object): class ElementwiseProduct(VectorTransformer): """ - .. note:: Experimental - Scales each column of the vector, with the supplied weight vector. i.e the elementwise product. diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index fb226e84e5..f58ea5dfb0 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -31,8 +31,6 @@ __all__ = ['FPGrowth', 'FPGrowthModel', 'PrefixSpan', 'PrefixSpanModel'] @ignore_unicode_prefix class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ - .. note:: Experimental - A FP-Growth model for mining frequent itemsets using the Parallel FP-Growth algorithm. @@ -70,8 +68,6 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader): class FPGrowth(object): """ - .. note:: Experimental - A Parallel FP-growth algorithm to mine frequent itemsets. .. versionadded:: 1.4.0 @@ -108,8 +104,6 @@ class FPGrowth(object): @ignore_unicode_prefix class PrefixSpanModel(JavaModelWrapper): """ - .. note:: Experimental - Model fitted by PrefixSpan >>> data = [ @@ -133,8 +127,6 @@ class PrefixSpanModel(JavaModelWrapper): class PrefixSpan(object): """ - .. note:: Experimental - A parallel PrefixSpan algorithm to mine frequent sequential patterns. The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 15dc53a959..9672dbde82 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -1338,8 +1338,6 @@ class Matrices(object): class QRDecomposition(object): """ - .. note:: Experimental - Represents QR factors. """ def __init__(self, Q, R): diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index ea4f27cf4f..538cada7d1 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -40,8 +40,6 @@ __all__ = ['DistributedMatrix', 'RowMatrix', 'IndexedRow', class DistributedMatrix(object): """ - .. note:: Experimental - Represents a distributively stored matrix backed by one or more RDDs. @@ -57,8 +55,6 @@ class DistributedMatrix(object): class RowMatrix(DistributedMatrix): """ - .. note:: Experimental - Represents a row-oriented distributed Matrix with no meaningful row indices. @@ -306,8 +302,6 @@ class RowMatrix(DistributedMatrix): class IndexedRow(object): """ - .. note:: Experimental - Represents a row of an IndexedRowMatrix. Just a wrapper over a (long, vector) tuple. @@ -334,8 +328,6 @@ def _convert_to_indexed_row(row): class IndexedRowMatrix(DistributedMatrix): """ - .. note:: Experimental - Represents a row-oriented distributed Matrix with indexed rows. :param rows: An RDD of IndexedRows or (long, vector) tuples. @@ -536,8 +528,6 @@ class IndexedRowMatrix(DistributedMatrix): class MatrixEntry(object): """ - .. note:: Experimental - Represents an entry of a CoordinateMatrix. Just a wrapper over a (long, long, float) tuple. @@ -566,8 +556,6 @@ def _convert_to_matrix_entry(entry): class CoordinateMatrix(DistributedMatrix): """ - .. note:: Experimental - Represents a matrix in coordinate format. :param entries: An RDD of MatrixEntry inputs or @@ -795,8 +783,6 @@ def _convert_to_matrix_block_tuple(block): class BlockMatrix(DistributedMatrix): """ - .. note:: Experimental - Represents a distributed matrix in blocks of local matrices. :param blocks: An RDD of sub-matrix blocks diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py index 7da921976d..3b1c5519bd 100644 --- a/python/pyspark/mllib/stat/KernelDensity.py +++ b/python/pyspark/mllib/stat/KernelDensity.py @@ -28,8 +28,6 @@ from pyspark.rdd import RDD class KernelDensity(object): """ - .. note:: Experimental - Estimate probability density at required points given a RDD of samples from the population. diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index b0a85240b2..67d5f0e44f 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -160,8 +160,6 @@ class Statistics(object): @ignore_unicode_prefix def chiSqTest(observed, expected=None): """ - .. note:: Experimental - If `observed` is Vector, conduct Pearson's chi-squared goodness of fit test of the observed data against the expected distribution, or againt the uniform distribution (by default), with each category @@ -246,8 +244,6 @@ class Statistics(object): @ignore_unicode_prefix def kolmogorovSmirnovTest(data, distName="norm", *params): """ - .. note:: Experimental - Performs the Kolmogorov-Smirnov (KS) test for data sampled from a continuous distribution. It tests the null hypothesis that the data is generated from a particular distribution. diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index 8be76fcefe..b3011d42e5 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -76,8 +76,6 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable): class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ - .. note:: Experimental - A decision tree model for classification or regression. .. versionadded:: 1.1.0 @@ -130,8 +128,6 @@ class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader): class DecisionTree(object): """ - .. note:: Experimental - Learning algorithm for a decision tree model for classification or regression. @@ -283,8 +279,6 @@ class DecisionTree(object): @inherit_doc class RandomForestModel(TreeEnsembleModel, JavaLoader): """ - .. note:: Experimental - Represents a random forest model. .. versionadded:: 1.2.0 @@ -297,8 +291,6 @@ class RandomForestModel(TreeEnsembleModel, JavaLoader): class RandomForest(object): """ - .. note:: Experimental - Learning algorithm for a random forest model for classification or regression. @@ -486,8 +478,6 @@ class RandomForest(object): @inherit_doc class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader): """ - .. note:: Experimental - Represents a gradient-boosted tree model. .. versionadded:: 1.3.0 @@ -500,8 +490,6 @@ class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader): class GradientBoostedTrees(object): """ - .. note:: Experimental - Learning algorithm for a gradient boosted trees model for classification or regression. -- cgit v1.2.3