aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2016-07-13 12:33:39 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-07-13 12:33:39 -0700
commit01f09b161217193b797c8c85969d17054c958615 (patch)
tree40d7d4f5932157f8e0f0c13228dd18063728d4d3 /python/pyspark
parentd8220c1e5e94abbdb9643672b918f0d748206db9 (diff)
downloadspark-01f09b161217193b797c8c85969d17054c958615.tar.gz
spark-01f09b161217193b797c8c85969d17054c958615.tar.bz2
spark-01f09b161217193b797c8c85969d17054c958615.zip
[SPARK-14812][ML][MLLIB][PYTHON] Experimental, DeveloperApi annotation audit for ML
## What changes were proposed in this pull request? General decisions to follow, except where noted: * spark.mllib, pyspark.mllib: Remove all Experimental annotations. Leave DeveloperApi annotations alone. * spark.ml, pyspark.ml ** Annotate Estimator-Model pairs of classes and companion objects the same way. ** For all algorithms marked Experimental with Since tag <= 1.6, remove Experimental annotation. ** For all algorithms marked Experimental with Since tag = 2.0, leave Experimental annotation. * DeveloperApi annotations are left alone, except where noted. * No changes to which types are sealed. Exceptions where I am leaving items Experimental in spark.ml, pyspark.ml, mainly because the items are new: * Model Summary classes * MLWriter, MLReader, MLWritable, MLReadable * Evaluator and subclasses: There is discussion of changes around evaluating multiple metrics at once for efficiency. * RFormula: Its behavior may need to change slightly to match R in edge cases. * AFTSurvivalRegression * MultilayerPerceptronClassifier DeveloperApi changes: * ml.tree.Node, ml.tree.Split, and subclasses should no longer be DeveloperApi ## How was this patch tested? N/A Note to reviewers: * spark.ml.clustering.LDA underwent significant changes (additional methods), so let me know if you want me to leave it Experimental. * Be careful to check for cases where a class should no longer be Experimental but has an Experimental method, val, or other feature. I did not find such cases, but please verify. Author: Joseph K. Bradley <joseph@databricks.com> Closes #14147 from jkbradley/experimental-audit.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/ml/classification.py24
-rwxr-xr-xpython/pyspark/ml/feature.py64
-rw-r--r--python/pyspark/ml/regression.py34
-rw-r--r--python/pyspark/ml/tuning.py6
-rw-r--r--python/pyspark/mllib/classification.py6
-rw-r--r--python/pyspark/mllib/clustering.py16
-rw-r--r--python/pyspark/mllib/feature.py16
-rw-r--r--python/pyspark/mllib/fpm.py8
-rw-r--r--python/pyspark/mllib/linalg/__init__.py2
-rw-r--r--python/pyspark/mllib/linalg/distributed.py14
-rw-r--r--python/pyspark/mllib/stat/KernelDensity.py2
-rw-r--r--python/pyspark/mllib/stat/_statistics.py4
-rw-r--r--python/pyspark/mllib/tree.py12
13 files changed, 11 insertions, 197 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index c035942f73..3c4af90aca 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -49,8 +49,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
HasWeightCol, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Logistic regression.
Currently, this class only supports binary classification.
@@ -216,8 +214,6 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by LogisticRegression.
.. versionadded:: 1.3.0
@@ -277,6 +273,8 @@ class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
class LogisticRegressionSummary(JavaWrapper):
"""
+ .. note:: Experimental
+
Abstraction for Logistic Regression Results for a given model.
.. versionadded:: 2.0.0
@@ -321,6 +319,8 @@ class LogisticRegressionSummary(JavaWrapper):
@inherit_doc
class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
"""
+ .. note:: Experimental
+
Abstraction for multinomial Logistic Regression Training results.
Currently, the training summary ignores the training weights except
for the objective trace.
@@ -501,8 +501,6 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
JavaMLReadable):
"""
- .. note:: Experimental
-
`Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
learning algorithm for classification.
It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -599,8 +597,6 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
@inherit_doc
class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by DecisionTreeClassifier.
.. versionadded:: 1.4.0
@@ -634,8 +630,6 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
learning algorithm for classification.
It supports both binary and multiclass labels, as well as both continuous and categorical
@@ -730,8 +724,6 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
class RandomForestClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by RandomForestClassifier.
.. versionadded:: 1.4.0
@@ -764,8 +756,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
JavaMLReadable):
"""
- .. note:: Experimental
-
`Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
learning algorithm for classification.
It supports binary labels, as well as both continuous and categorical features.
@@ -885,8 +875,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by GBTClassifier.
.. versionadded:: 1.4.0
@@ -918,8 +906,6 @@ class GBTClassificationModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable)
class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Naive Bayes Classifiers.
It supports both Multinomial and Bernoulli NB. `Multinomial NB
<http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_
@@ -1043,8 +1029,6 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by NaiveBayes.
.. versionadded:: 1.5.0
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index bbbb94f9a0..2881380152 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -60,8 +60,6 @@ __all__ = ['Binarizer',
@inherit_doc
class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Binarize a column of continuous features given a threshold.
>>> df = spark.createDataFrame([(0.5,)], ["values"])
@@ -125,8 +123,6 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
@inherit_doc
class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Maps a column of continuous features to a column of feature buckets.
>>> df = spark.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
@@ -200,8 +196,6 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
@inherit_doc
class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`.
>>> df = spark.createDataFrame(
@@ -348,8 +342,6 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable,
class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`CountVectorizer`.
.. versionadded:: 1.6.0
@@ -367,8 +359,6 @@ class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
@inherit_doc
class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A feature transformer that takes the 1D discrete cosine transform
of a real vector. No zero padding is performed on the input vector.
It returns a real vector of the same length representing the DCT.
@@ -439,8 +429,6 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
Outputs the Hadamard product (i.e., the element-wise product) of each input vector
with a provided "weight" vector. In other words, it scales each column of the dataset
by a scalar multiplier.
@@ -505,8 +493,6 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
Maps a sequence of terms to their term frequencies using the hashing trick.
Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
to calculate the hash code value for the term object.
@@ -576,8 +562,6 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
@inherit_doc
class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Compute the Inverse Document Frequency (IDF) given a collection of documents.
>>> from pyspark.ml.linalg import DenseVector
@@ -653,8 +637,6 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`IDF`.
.. versionadded:: 1.4.0
@@ -752,8 +734,6 @@ class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
@inherit_doc
class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Rescale each feature individually to a common range [min, max] linearly using column summary
statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
feature E is calculated as,
@@ -859,8 +839,6 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`MinMaxScaler`.
.. versionadded:: 1.6.0
@@ -887,8 +865,6 @@ class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
@ignore_unicode_prefix
class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A feature transformer that converts the input array of strings into an array of n-grams. Null
values in the input array are ignored.
It returns an array of n-grams where each n-gram is represented by a space-separated string of
@@ -965,8 +941,6 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
@inherit_doc
class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Normalize a vector to have unit norm using the given p-norm.
>>> from pyspark.ml.linalg import Vectors
@@ -1031,8 +1005,6 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
@inherit_doc
class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A one-hot encoder that maps a column of category indices to a
column of binary vectors, with at most a single one-value per row
that indicates the input category index.
@@ -1114,8 +1086,6 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion
<http://en.wikipedia.org/wiki/Polynomial_expansion>`_, "In mathematics, an
expansion of a product of sums expresses it as a sum of products by using the fact that
@@ -1287,8 +1257,6 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab
@ignore_unicode_prefix
class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A regex based tokenizer that extracts tokens either by using the
provided regex pattern (in Java dialect) to split the text
(default) or repeatedly matching the regex (if gaps is false).
@@ -1418,8 +1386,6 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
@inherit_doc
class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Implements the transforms which are defined by SQL statement.
Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
where '__THIS__' represents the underlying table of the input dataset.
@@ -1479,8 +1445,6 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
@inherit_doc
class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Standardizes features by removing the mean and scaling to unit variance using column summary
statistics on the samples in the training set.
@@ -1576,8 +1540,6 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`StandardScaler`.
.. versionadded:: 1.4.0
@@ -1604,8 +1566,6 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
A label indexer that maps a string column of labels to an ML column of label indices.
If the input column is numeric, we cast it to string and index the string values.
The indices are in [0, numLabels), ordered by label frequencies.
@@ -1668,8 +1628,6 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`StringIndexer`.
.. versionadded:: 1.4.0
@@ -1687,8 +1645,6 @@ class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
@inherit_doc
class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A :py:class:`Transformer` that maps a column of indices back to a new column of
corresponding string values.
The index-string mapping is either from the ML attributes of the input column,
@@ -1741,8 +1697,6 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A feature transformer that filters out stop words from input.
Note: null values from input array are preserved unless adding null to stopWords explicitly.
@@ -1833,8 +1787,6 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl
@ignore_unicode_prefix
class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A tokenizer that converts the input string to lowercase and then
splits it by white spaces.
@@ -1888,8 +1840,6 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
@inherit_doc
class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
A feature transformer that merges multiple columns into a vector column.
>>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
@@ -1934,8 +1884,6 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl
@inherit_doc
class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Class for indexing categorical feature columns in a dataset of `Vector`.
This has 2 usage modes:
@@ -2050,8 +1998,6 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`VectorIndexer`.
Transform categorical features to use 0-based indices instead of their original values.
@@ -2089,8 +2035,6 @@ class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
@inherit_doc
class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
This class takes a feature vector and outputs a new feature vector with a subarray
of the original features.
@@ -2183,8 +2127,6 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
natural language processing or machine learning process.
@@ -2352,8 +2294,6 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`Word2Vec`.
.. versionadded:: 1.4.0
@@ -2383,8 +2323,6 @@ class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
@inherit_doc
class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
PCA trains a model to project vectors to a lower dimensional space of the
top :py:attr:`k` principal components.
@@ -2458,8 +2396,6 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space.
.. versionadded:: 1.5.0
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 8de9ad8531..d88dc75353 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -41,8 +41,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
HasStandardization, HasSolver, HasWeightCol, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Linear regression.
The learning objective is to minimize the squared error, with regularization.
@@ -130,8 +128,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by :class:`LinearRegression`.
.. versionadded:: 1.4.0
@@ -411,8 +407,6 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary):
class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
HasWeightCol, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Currently implemented using parallelized pool adjacent violators algorithm.
Only univariate (single feature) algorithm supported.
@@ -439,6 +433,8 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
True
>>> model.predictions == model2.predictions
True
+
+ .. versionadded:: 1.6.0
"""
isotonic = \
@@ -505,13 +501,13 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by :class:`IsotonicRegression`.
+
+ .. versionadded:: 1.6.0
"""
@property
- @since("2.0.0")
+ @since("1.6.0")
def boundaries(self):
"""
Boundaries in increasing order for which predictions are known.
@@ -519,7 +515,7 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
return self._call_java("boundaries")
@property
- @since("2.0.0")
+ @since("1.6.0")
def predictions(self):
"""
Predictions associated with the boundaries at the same index, monotone because of isotonic
@@ -642,8 +638,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
"""
- .. note:: Experimental
-
`Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
learning algorithm for regression.
It supports both continuous and categorical features.
@@ -727,8 +721,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
@inherit_doc
class DecisionTreeModel(JavaModel):
"""
- .. note:: Experimental
-
Abstraction for Decision Tree models.
.. versionadded:: 1.5.0
@@ -759,11 +751,9 @@ class DecisionTreeModel(JavaModel):
@inherit_doc
class TreeEnsembleModels(JavaModel):
"""
- .. note:: Experimental
+ (private abstraction)
Represents a tree ensemble model.
-
- .. versionadded:: 1.5.0
"""
@property
@@ -803,8 +793,6 @@ class TreeEnsembleModels(JavaModel):
@inherit_doc
class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by :class:`DecisionTreeRegressor`.
.. versionadded:: 1.4.0
@@ -837,8 +825,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
`Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
learning algorithm for regression.
It supports both continuous and categorical features.
@@ -925,8 +911,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
class RandomForestRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by :class:`RandomForestRegressor`.
.. versionadded:: 1.4.0
@@ -959,8 +943,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
JavaMLReadable, TreeRegressorParams):
"""
- .. note:: Experimental
-
`Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
learning algorithm for regression.
It supports both continuous and categorical features.
@@ -1067,8 +1049,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
class GBTRegressionModel(TreeEnsembleModels, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by :class:`GBTRegressor`.
.. versionadded:: 1.4.0
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index f857c5e8c8..298314d46c 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -33,8 +33,6 @@ __all__ = ['ParamGridBuilder', 'CrossValidator', 'CrossValidatorModel', 'TrainVa
class ParamGridBuilder(object):
r"""
- .. note:: Experimental
-
Builder for a param grid used in grid search-based model selection.
>>> from pyspark.ml.classification import LogisticRegression
@@ -145,8 +143,6 @@ class ValidatorParams(HasSeed):
class CrossValidator(Estimator, ValidatorParams):
"""
- .. note:: Experimental
-
K-fold cross validation.
>>> from pyspark.ml.classification import LogisticRegression
@@ -264,8 +260,6 @@ class CrossValidator(Estimator, ValidatorParams):
class CrossValidatorModel(Model, ValidatorParams):
"""
- .. note:: Experimental
-
Model from k-fold cross validation.
.. versionadded:: 1.4.0
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3734f87405..9f53ed0982 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -48,8 +48,6 @@ class LinearClassificationModel(LinearModel):
@since('1.4.0')
def setThreshold(self, value):
"""
- .. note:: Experimental
-
Sets the threshold that separates positive predictions from
negative predictions. An example with prediction score greater
than or equal to this threshold is identified as a positive,
@@ -62,8 +60,6 @@ class LinearClassificationModel(LinearModel):
@since('1.4.0')
def threshold(self):
"""
- .. note:: Experimental
-
Returns the threshold (if any) used for converting raw
prediction scores into 0/1 predictions. It is used for
binary classification only.
@@ -73,8 +69,6 @@ class LinearClassificationModel(LinearModel):
@since('1.4.0')
def clearThreshold(self):
"""
- .. note:: Experimental
-
Clears the threshold so that `predict` will output raw
prediction scores. It is used for binary classification only.
"""
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index c38c543972..c8c3c42774 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -47,8 +47,6 @@ __all__ = ['BisectingKMeansModel', 'BisectingKMeans', 'KMeansModel', 'KMeans',
@inherit_doc
class BisectingKMeansModel(JavaModelWrapper):
"""
- .. note:: Experimental
-
A clustering model derived from the bisecting k-means method.
>>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
@@ -120,8 +118,6 @@ class BisectingKMeansModel(JavaModelWrapper):
class BisectingKMeans(object):
"""
- .. note:: Experimental
-
A bisecting k-means algorithm based on the paper "A comparison of
document clustering techniques" by Steinbach, Karypis, and Kumar,
with modification to fit Spark.
@@ -366,8 +362,6 @@ class KMeans(object):
class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
A clustering model derived from the Gaussian Mixture Model method.
>>> from pyspark.mllib.linalg import Vectors, DenseMatrix
@@ -513,8 +507,6 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class GaussianMixture(object):
"""
- .. note:: Experimental
-
Learning algorithm for Gaussian Mixtures using the expectation-maximization algorithm.
.. versionadded:: 1.3.0
@@ -565,8 +557,6 @@ class GaussianMixture(object):
class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
Model produced by [[PowerIterationClustering]].
>>> import math
@@ -645,8 +635,6 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class PowerIterationClustering(object):
"""
- .. note:: Experimental
-
Power Iteration Clustering (PIC), a scalable graph clustering algorithm
developed by [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]].
From the abstract: PIC finds a very low-dimensional embedding of a
@@ -693,8 +681,6 @@ class PowerIterationClustering(object):
class StreamingKMeansModel(KMeansModel):
"""
- .. note:: Experimental
-
Clustering model which can perform an online update of the centroids.
The update formula for each centroid is given by
@@ -794,8 +780,6 @@ class StreamingKMeansModel(KMeansModel):
class StreamingKMeans(object):
"""
- .. note:: Experimental
-
Provides methods to set k, decayFactor, timeUnit to configure the
KMeans algorithm for fitting and predicting on incoming dstreams.
More details on how the centroids are updated are provided under the
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index aef91a8ddc..c8a6e33f4d 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -60,8 +60,6 @@ class VectorTransformer(object):
class Normalizer(VectorTransformer):
"""
- .. note:: Experimental
-
Normalizes samples individually to unit L\ :sup:`p`\ norm
For any 1 <= `p` < float('inf'), normalizes samples using
@@ -131,8 +129,6 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
class StandardScalerModel(JavaVectorTransformer):
"""
- .. note:: Experimental
-
Represents a StandardScaler model that can transform vectors.
.. versionadded:: 1.2.0
@@ -207,8 +203,6 @@ class StandardScalerModel(JavaVectorTransformer):
class StandardScaler(object):
"""
- .. note:: Experimental
-
Standardizes features by removing the mean and scaling to unit
variance using column summary statistics on the samples in the
training set.
@@ -262,8 +256,6 @@ class StandardScaler(object):
class ChiSqSelectorModel(JavaVectorTransformer):
"""
- .. note:: Experimental
-
Represents a Chi Squared selector model.
.. versionadded:: 1.4.0
@@ -282,8 +274,6 @@ class ChiSqSelectorModel(JavaVectorTransformer):
class ChiSqSelector(object):
"""
- .. note:: Experimental
-
Creates a ChiSquared feature selector.
:param numTopFeatures: number of features that selector will select.
@@ -361,8 +351,6 @@ class PCA(object):
class HashingTF(object):
"""
- .. note:: Experimental
-
Maps a sequence of terms to their term frequencies using the hashing
trick.
@@ -448,8 +436,6 @@ class IDFModel(JavaVectorTransformer):
class IDF(object):
"""
- .. note:: Experimental
-
Inverse document frequency (IDF).
The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`,
@@ -697,8 +683,6 @@ class Word2Vec(object):
class ElementwiseProduct(VectorTransformer):
"""
- .. note:: Experimental
-
Scales each column of the vector, with the supplied weight vector.
i.e the elementwise product.
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index fb226e84e5..f58ea5dfb0 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -31,8 +31,6 @@ __all__ = ['FPGrowth', 'FPGrowthModel', 'PrefixSpan', 'PrefixSpanModel']
@ignore_unicode_prefix
class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
A FP-Growth model for mining frequent itemsets
using the Parallel FP-Growth algorithm.
@@ -70,8 +68,6 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class FPGrowth(object):
"""
- .. note:: Experimental
-
A Parallel FP-growth algorithm to mine frequent itemsets.
.. versionadded:: 1.4.0
@@ -108,8 +104,6 @@ class FPGrowth(object):
@ignore_unicode_prefix
class PrefixSpanModel(JavaModelWrapper):
"""
- .. note:: Experimental
-
Model fitted by PrefixSpan
>>> data = [
@@ -133,8 +127,6 @@ class PrefixSpanModel(JavaModelWrapper):
class PrefixSpan(object):
"""
- .. note:: Experimental
-
A parallel PrefixSpan algorithm to mine frequent sequential patterns.
The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan:
Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 15dc53a959..9672dbde82 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -1338,8 +1338,6 @@ class Matrices(object):
class QRDecomposition(object):
"""
- .. note:: Experimental
-
Represents QR factors.
"""
def __init__(self, Q, R):
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index ea4f27cf4f..538cada7d1 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -40,8 +40,6 @@ __all__ = ['DistributedMatrix', 'RowMatrix', 'IndexedRow',
class DistributedMatrix(object):
"""
- .. note:: Experimental
-
Represents a distributively stored matrix backed by one or
more RDDs.
@@ -57,8 +55,6 @@ class DistributedMatrix(object):
class RowMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a row-oriented distributed Matrix with no meaningful
row indices.
@@ -306,8 +302,6 @@ class RowMatrix(DistributedMatrix):
class IndexedRow(object):
"""
- .. note:: Experimental
-
Represents a row of an IndexedRowMatrix.
Just a wrapper over a (long, vector) tuple.
@@ -334,8 +328,6 @@ def _convert_to_indexed_row(row):
class IndexedRowMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a row-oriented distributed Matrix with indexed rows.
:param rows: An RDD of IndexedRows or (long, vector) tuples.
@@ -536,8 +528,6 @@ class IndexedRowMatrix(DistributedMatrix):
class MatrixEntry(object):
"""
- .. note:: Experimental
-
Represents an entry of a CoordinateMatrix.
Just a wrapper over a (long, long, float) tuple.
@@ -566,8 +556,6 @@ def _convert_to_matrix_entry(entry):
class CoordinateMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a matrix in coordinate format.
:param entries: An RDD of MatrixEntry inputs or
@@ -795,8 +783,6 @@ def _convert_to_matrix_block_tuple(block):
class BlockMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a distributed matrix in blocks of local matrices.
:param blocks: An RDD of sub-matrix blocks
diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py
index 7da921976d..3b1c5519bd 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -28,8 +28,6 @@ from pyspark.rdd import RDD
class KernelDensity(object):
"""
- .. note:: Experimental
-
Estimate probability density at required points given a RDD of samples
from the population.
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index b0a85240b2..67d5f0e44f 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -160,8 +160,6 @@ class Statistics(object):
@ignore_unicode_prefix
def chiSqTest(observed, expected=None):
"""
- .. note:: Experimental
-
If `observed` is Vector, conduct Pearson's chi-squared goodness
of fit test of the observed data against the expected distribution,
or againt the uniform distribution (by default), with each category
@@ -246,8 +244,6 @@ class Statistics(object):
@ignore_unicode_prefix
def kolmogorovSmirnovTest(data, distName="norm", *params):
"""
- .. note:: Experimental
-
Performs the Kolmogorov-Smirnov (KS) test for data sampled from
a continuous distribution. It tests the null hypothesis that
the data is generated from a particular distribution.
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index 8be76fcefe..b3011d42e5 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -76,8 +76,6 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
A decision tree model for classification or regression.
.. versionadded:: 1.1.0
@@ -130,8 +128,6 @@ class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class DecisionTree(object):
"""
- .. note:: Experimental
-
Learning algorithm for a decision tree model for classification or
regression.
@@ -283,8 +279,6 @@ class DecisionTree(object):
@inherit_doc
class RandomForestModel(TreeEnsembleModel, JavaLoader):
"""
- .. note:: Experimental
-
Represents a random forest model.
.. versionadded:: 1.2.0
@@ -297,8 +291,6 @@ class RandomForestModel(TreeEnsembleModel, JavaLoader):
class RandomForest(object):
"""
- .. note:: Experimental
-
Learning algorithm for a random forest model for classification or
regression.
@@ -486,8 +478,6 @@ class RandomForest(object):
@inherit_doc
class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
"""
- .. note:: Experimental
-
Represents a gradient-boosted tree model.
.. versionadded:: 1.3.0
@@ -500,8 +490,6 @@ class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
class GradientBoostedTrees(object):
"""
- .. note:: Experimental
-
Learning algorithm for a gradient boosted trees model for
classification or regression.