From 01f09b161217193b797c8c85969d17054c958615 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 13 Jul 2016 12:33:39 -0700
Subject: [SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi
 annotation audit for ML

## What changes were proposed in this pull request?

General decisions to follow, except where noted:
* spark.mllib, pyspark.mllib: Remove all Experimental annotations.  Leave DeveloperApi annotations alone.
* spark.ml, pyspark.ml
** Annotate Estimator-Model pairs of classes and companion objects the same way.
** For all algorithms marked Experimental with Since tag <= 1.6, remove Experimental annotation.
** For all algorithms marked Experimental with Since tag = 2.0, leave Experimental annotation.
* DeveloperApi annotations are left alone, except where noted.
* No changes to which types are sealed.

Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, mainly because the items are new:
* Model Summary classes
* MLWriter, MLReader, MLWritable, MLReadable
* Evaluator and subclasses: There is discussion of changes around evaluating multiple metrics at once for efficiency.
* RFormula: Its behavior may need to change slightly to match R in edge cases.
* AFTSurvivalRegression
* MultilayerPerceptronClassifier

DeveloperApi changes:
* ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi

## How was this patch tested?

N/A

Note to reviewers:
* spark.ml.clustering.LDA underwent significant changes (additional methods), so let me know if you want me to leave it Experimental.
* Be careful to check for cases where a class should no longer be Experimental but has an Experimental method, val, or other feature.  I did not find such cases, but please verify.

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #14147 from jkbradley/experimental-audit.
---
 python/pyspark/ml/feature.py | 64 --------------------------------------------
 1 file changed, 64 deletions(-)

(limited to 'python/pyspark/ml/feature.py')

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index bbbb94f9a0..2881380152 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -60,8 +60,6 @@ __all__ = ['Binarizer',
 @inherit_doc
 class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Binarize a column of continuous features given a threshold.
 
     >>> df = spark.createDataFrame([(0.5,)], ["values"])
@@ -125,8 +123,6 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
 @inherit_doc
 class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Maps a column of continuous features to a column of feature buckets.
 
     >>> df = spark.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
@@ -200,8 +196,6 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 @inherit_doc
 class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`.
 
     >>> df = spark.createDataFrame(
@@ -348,8 +342,6 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable,
 
 class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`CountVectorizer`.
 
     .. versionadded:: 1.6.0
@@ -367,8 +359,6 @@ class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
 @inherit_doc
 class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that takes the 1D discrete cosine transform
     of a real vector. No zero padding is performed on the input vector.
     It returns a real vector of the same length representing the DCT.
@@ -439,8 +429,6 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
 class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                          JavaMLWritable):
     """
-    .. note:: Experimental
-
     Outputs the Hadamard product (i.e., the element-wise product) of each input vector
     with a provided "weight" vector. In other words, it scales each column of the dataset
     by a scalar multiplier.
@@ -505,8 +493,6 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable,
                 JavaMLWritable):
     """
-    .. note:: Experimental
-
     Maps a sequence of terms to their term frequencies using the hashing trick.
     Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
     to calculate the hash code value for the term object.
@@ -576,8 +562,6 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
 @inherit_doc
 class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Compute the Inverse Document Frequency (IDF) given a collection of documents.
 
     >>> from pyspark.ml.linalg import DenseVector
@@ -653,8 +637,6 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
 
 class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`IDF`.
 
     .. versionadded:: 1.4.0
@@ -752,8 +734,6 @@ class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
 @inherit_doc
 class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Rescale each feature individually to a common range [min, max] linearly using column summary
     statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
     feature E is calculated as,
@@ -859,8 +839,6 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 
 class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`MinMaxScaler`.
 
     .. versionadded:: 1.6.0
@@ -887,8 +865,6 @@ class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
 @ignore_unicode_prefix
 class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that converts the input array of strings into an array of n-grams. Null
     values in the input array are ignored.
     It returns an array of n-grams where each n-gram is represented by a space-separated string of
@@ -965,8 +941,6 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
 @inherit_doc
 class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
      Normalize a vector to have unit norm using the given p-norm.
 
     >>> from pyspark.ml.linalg import Vectors
@@ -1031,8 +1005,6 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 @inherit_doc
 class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A one-hot encoder that maps a column of category indices to a
     column of binary vectors, with at most a single one-value per row
     that indicates the input category index.
@@ -1114,8 +1086,6 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
 class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                           JavaMLWritable):
     """
-    .. note:: Experimental
-
     Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion
     <http://en.wikipedia.org/wiki/Polynomial_expansion>`_, "In mathematics, an
     expansion of a product of sums expresses it as a sum of products by using the fact that
@@ -1287,8 +1257,6 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab
 @ignore_unicode_prefix
 class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A regex based tokenizer that extracts tokens either by using the
     provided regex pattern (in Java dialect) to split the text
     (default) or repeatedly matching the regex (if gaps is false).
@@ -1418,8 +1386,6 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
 @inherit_doc
 class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Implements the transforms which are defined by SQL statement.
     Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
     where '__THIS__' represents the underlying table of the input dataset.
@@ -1479,8 +1445,6 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
 @inherit_doc
 class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Standardizes features by removing the mean and scaling to unit variance using column summary
     statistics on the samples in the training set.
 
@@ -1576,8 +1540,6 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
 
 class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`StandardScaler`.
 
     .. versionadded:: 1.4.0
@@ -1604,8 +1566,6 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
 class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable,
                     JavaMLWritable):
     """
-    .. note:: Experimental
-
     A label indexer that maps a string column of labels to an ML column of label indices.
     If the input column is numeric, we cast it to string and index the string values.
     The indices are in [0, numLabels), ordered by label frequencies.
@@ -1668,8 +1628,6 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
 
 class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`StringIndexer`.
 
     .. versionadded:: 1.4.0
@@ -1687,8 +1645,6 @@ class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
 @inherit_doc
 class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A :py:class:`Transformer` that maps a column of indices back to a new column of
     corresponding string values.
     The index-string mapping is either from the ML attributes of the input column,
@@ -1741,8 +1697,6 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
 
 class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that filters out stop words from input.
     Note: null values from input array are preserved unless adding null to stopWords explicitly.
 
@@ -1833,8 +1787,6 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl
 @ignore_unicode_prefix
 class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A tokenizer that converts the input string to lowercase and then
     splits it by white spaces.
 
@@ -1888,8 +1840,6 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
 @inherit_doc
 class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     A feature transformer that merges multiple columns into a vector column.
 
     >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
@@ -1934,8 +1884,6 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl
 @inherit_doc
 class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Class for indexing categorical feature columns in a dataset of `Vector`.
 
     This has 2 usage modes:
@@ -2050,8 +1998,6 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
 
 class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`VectorIndexer`.
 
     Transform categorical features to use 0-based indices instead of their original values.
@@ -2089,8 +2035,6 @@ class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
 @inherit_doc
 class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     This class takes a feature vector and outputs a new feature vector with a subarray
     of the original features.
 
@@ -2183,8 +2127,6 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
 class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol,
                JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
     natural language processing or machine learning process.
 
@@ -2352,8 +2294,6 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
 
 class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`Word2Vec`.
 
     .. versionadded:: 1.4.0
@@ -2383,8 +2323,6 @@ class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
 @inherit_doc
 class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     PCA trains a model to project vectors to a lower dimensional space of the
     top :py:attr:`k` principal components.
 
@@ -2458,8 +2396,6 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
 
 class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space.
 
     .. versionadded:: 1.5.0
-- 
cgit v1.2.3