aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/classification.py6
-rw-r--r--python/pyspark/mllib/clustering.py16
-rw-r--r--python/pyspark/mllib/feature.py16
-rw-r--r--python/pyspark/mllib/fpm.py8
-rw-r--r--python/pyspark/mllib/linalg/__init__.py2
-rw-r--r--python/pyspark/mllib/linalg/distributed.py14
-rw-r--r--python/pyspark/mllib/stat/KernelDensity.py2
-rw-r--r--python/pyspark/mllib/stat/_statistics.py4
-rw-r--r--python/pyspark/mllib/tree.py12
9 files changed, 0 insertions, 80 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 3734f87405..9f53ed0982 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -48,8 +48,6 @@ class LinearClassificationModel(LinearModel):
@since('1.4.0')
def setThreshold(self, value):
"""
- .. note:: Experimental
-
Sets the threshold that separates positive predictions from
negative predictions. An example with prediction score greater
than or equal to this threshold is identified as a positive,
@@ -62,8 +60,6 @@ class LinearClassificationModel(LinearModel):
@since('1.4.0')
def threshold(self):
"""
- .. note:: Experimental
-
Returns the threshold (if any) used for converting raw
prediction scores into 0/1 predictions. It is used for
binary classification only.
@@ -73,8 +69,6 @@ class LinearClassificationModel(LinearModel):
@since('1.4.0')
def clearThreshold(self):
"""
- .. note:: Experimental
-
Clears the threshold so that `predict` will output raw
prediction scores. It is used for binary classification only.
"""
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index c38c543972..c8c3c42774 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -47,8 +47,6 @@ __all__ = ['BisectingKMeansModel', 'BisectingKMeans', 'KMeansModel', 'KMeans',
@inherit_doc
class BisectingKMeansModel(JavaModelWrapper):
"""
- .. note:: Experimental
-
A clustering model derived from the bisecting k-means method.
>>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
@@ -120,8 +118,6 @@ class BisectingKMeansModel(JavaModelWrapper):
class BisectingKMeans(object):
"""
- .. note:: Experimental
-
A bisecting k-means algorithm based on the paper "A comparison of
document clustering techniques" by Steinbach, Karypis, and Kumar,
with modification to fit Spark.
@@ -366,8 +362,6 @@ class KMeans(object):
class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
A clustering model derived from the Gaussian Mixture Model method.
>>> from pyspark.mllib.linalg import Vectors, DenseMatrix
@@ -513,8 +507,6 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class GaussianMixture(object):
"""
- .. note:: Experimental
-
Learning algorithm for Gaussian Mixtures using the expectation-maximization algorithm.
.. versionadded:: 1.3.0
@@ -565,8 +557,6 @@ class GaussianMixture(object):
class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
Model produced by [[PowerIterationClustering]].
>>> import math
@@ -645,8 +635,6 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class PowerIterationClustering(object):
"""
- .. note:: Experimental
-
Power Iteration Clustering (PIC), a scalable graph clustering algorithm
developed by [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]].
From the abstract: PIC finds a very low-dimensional embedding of a
@@ -693,8 +681,6 @@ class PowerIterationClustering(object):
class StreamingKMeansModel(KMeansModel):
"""
- .. note:: Experimental
-
Clustering model which can perform an online update of the centroids.
The update formula for each centroid is given by
@@ -794,8 +780,6 @@ class StreamingKMeansModel(KMeansModel):
class StreamingKMeans(object):
"""
- .. note:: Experimental
-
Provides methods to set k, decayFactor, timeUnit to configure the
KMeans algorithm for fitting and predicting on incoming dstreams.
More details on how the centroids are updated are provided under the
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index aef91a8ddc..c8a6e33f4d 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -60,8 +60,6 @@ class VectorTransformer(object):
class Normalizer(VectorTransformer):
"""
- .. note:: Experimental
-
Normalizes samples individually to unit L\ :sup:`p`\ norm
For any 1 <= `p` < float('inf'), normalizes samples using
@@ -131,8 +129,6 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
class StandardScalerModel(JavaVectorTransformer):
"""
- .. note:: Experimental
-
Represents a StandardScaler model that can transform vectors.
.. versionadded:: 1.2.0
@@ -207,8 +203,6 @@ class StandardScalerModel(JavaVectorTransformer):
class StandardScaler(object):
"""
- .. note:: Experimental
-
Standardizes features by removing the mean and scaling to unit
variance using column summary statistics on the samples in the
training set.
@@ -262,8 +256,6 @@ class StandardScaler(object):
class ChiSqSelectorModel(JavaVectorTransformer):
"""
- .. note:: Experimental
-
Represents a Chi Squared selector model.
.. versionadded:: 1.4.0
@@ -282,8 +274,6 @@ class ChiSqSelectorModel(JavaVectorTransformer):
class ChiSqSelector(object):
"""
- .. note:: Experimental
-
Creates a ChiSquared feature selector.
:param numTopFeatures: number of features that selector will select.
@@ -361,8 +351,6 @@ class PCA(object):
class HashingTF(object):
"""
- .. note:: Experimental
-
Maps a sequence of terms to their term frequencies using the hashing
trick.
@@ -448,8 +436,6 @@ class IDFModel(JavaVectorTransformer):
class IDF(object):
"""
- .. note:: Experimental
-
Inverse document frequency (IDF).
The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`,
@@ -697,8 +683,6 @@ class Word2Vec(object):
class ElementwiseProduct(VectorTransformer):
"""
- .. note:: Experimental
-
Scales each column of the vector, with the supplied weight vector.
i.e the elementwise product.
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index fb226e84e5..f58ea5dfb0 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -31,8 +31,6 @@ __all__ = ['FPGrowth', 'FPGrowthModel', 'PrefixSpan', 'PrefixSpanModel']
@ignore_unicode_prefix
class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
A FP-Growth model for mining frequent itemsets
using the Parallel FP-Growth algorithm.
@@ -70,8 +68,6 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class FPGrowth(object):
"""
- .. note:: Experimental
-
A Parallel FP-growth algorithm to mine frequent itemsets.
.. versionadded:: 1.4.0
@@ -108,8 +104,6 @@ class FPGrowth(object):
@ignore_unicode_prefix
class PrefixSpanModel(JavaModelWrapper):
"""
- .. note:: Experimental
-
Model fitted by PrefixSpan
>>> data = [
@@ -133,8 +127,6 @@ class PrefixSpanModel(JavaModelWrapper):
class PrefixSpan(object):
"""
- .. note:: Experimental
-
A parallel PrefixSpan algorithm to mine frequent sequential patterns.
The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan:
Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 15dc53a959..9672dbde82 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -1338,8 +1338,6 @@ class Matrices(object):
class QRDecomposition(object):
"""
- .. note:: Experimental
-
Represents QR factors.
"""
def __init__(self, Q, R):
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index ea4f27cf4f..538cada7d1 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -40,8 +40,6 @@ __all__ = ['DistributedMatrix', 'RowMatrix', 'IndexedRow',
class DistributedMatrix(object):
"""
- .. note:: Experimental
-
Represents a distributively stored matrix backed by one or
more RDDs.
@@ -57,8 +55,6 @@ class DistributedMatrix(object):
class RowMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a row-oriented distributed Matrix with no meaningful
row indices.
@@ -306,8 +302,6 @@ class RowMatrix(DistributedMatrix):
class IndexedRow(object):
"""
- .. note:: Experimental
-
Represents a row of an IndexedRowMatrix.
Just a wrapper over a (long, vector) tuple.
@@ -334,8 +328,6 @@ def _convert_to_indexed_row(row):
class IndexedRowMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a row-oriented distributed Matrix with indexed rows.
:param rows: An RDD of IndexedRows or (long, vector) tuples.
@@ -536,8 +528,6 @@ class IndexedRowMatrix(DistributedMatrix):
class MatrixEntry(object):
"""
- .. note:: Experimental
-
Represents an entry of a CoordinateMatrix.
Just a wrapper over a (long, long, float) tuple.
@@ -566,8 +556,6 @@ def _convert_to_matrix_entry(entry):
class CoordinateMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a matrix in coordinate format.
:param entries: An RDD of MatrixEntry inputs or
@@ -795,8 +783,6 @@ def _convert_to_matrix_block_tuple(block):
class BlockMatrix(DistributedMatrix):
"""
- .. note:: Experimental
-
Represents a distributed matrix in blocks of local matrices.
:param blocks: An RDD of sub-matrix blocks
diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py
index 7da921976d..3b1c5519bd 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -28,8 +28,6 @@ from pyspark.rdd import RDD
class KernelDensity(object):
"""
- .. note:: Experimental
-
Estimate probability density at required points given a RDD of samples
from the population.
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index b0a85240b2..67d5f0e44f 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -160,8 +160,6 @@ class Statistics(object):
@ignore_unicode_prefix
def chiSqTest(observed, expected=None):
"""
- .. note:: Experimental
-
If `observed` is Vector, conduct Pearson's chi-squared goodness
of fit test of the observed data against the expected distribution,
or againt the uniform distribution (by default), with each category
@@ -246,8 +244,6 @@ class Statistics(object):
@ignore_unicode_prefix
def kolmogorovSmirnovTest(data, distName="norm", *params):
"""
- .. note:: Experimental
-
Performs the Kolmogorov-Smirnov (KS) test for data sampled from
a continuous distribution. It tests the null hypothesis that
the data is generated from a particular distribution.
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index 8be76fcefe..b3011d42e5 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -76,8 +76,6 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
- .. note:: Experimental
-
A decision tree model for classification or regression.
.. versionadded:: 1.1.0
@@ -130,8 +128,6 @@ class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
class DecisionTree(object):
"""
- .. note:: Experimental
-
Learning algorithm for a decision tree model for classification or
regression.
@@ -283,8 +279,6 @@ class DecisionTree(object):
@inherit_doc
class RandomForestModel(TreeEnsembleModel, JavaLoader):
"""
- .. note:: Experimental
-
Represents a random forest model.
.. versionadded:: 1.2.0
@@ -297,8 +291,6 @@ class RandomForestModel(TreeEnsembleModel, JavaLoader):
class RandomForest(object):
"""
- .. note:: Experimental
-
Learning algorithm for a random forest model for classification or
regression.
@@ -486,8 +478,6 @@ class RandomForest(object):
@inherit_doc
class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
"""
- .. note:: Experimental
-
Represents a gradient-boosted tree model.
.. versionadded:: 1.3.0
@@ -500,8 +490,6 @@ class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
class GradientBoostedTrees(object):
"""
- .. note:: Experimental
-
Learning algorithm for a gradient boosted trees model for
classification or regression.