diff options
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r-- | python/pyspark/mllib/clustering.py | 6 | ||||
-rw-r--r-- | python/pyspark/mllib/feature.py | 24 | ||||
-rw-r--r-- | python/pyspark/mllib/linalg/__init__.py | 11 | ||||
-rw-r--r-- | python/pyspark/mllib/linalg/distributed.py | 15 | ||||
-rw-r--r-- | python/pyspark/mllib/regression.py | 2 | ||||
-rw-r--r-- | python/pyspark/mllib/stat/_statistics.py | 3 | ||||
-rw-r--r-- | python/pyspark/mllib/tree.py | 12 |
7 files changed, 39 insertions, 34 deletions
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index 2036168e45..91123ace33 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -699,9 +699,9 @@ class StreamingKMeansModel(KMeansModel): * n_t+1: New number of weights. * a: Decay Factor, which gives the forgetfulness. - Note that if a is set to 1, it is the weighted mean of the previous - and new data. If it set to zero, the old centroids are completely - forgotten. + .. note:: If a is set to 1, it is the weighted mean of the previous + and new data. If it set to zero, the old centroids are completely + forgotten. :param clusterCenters: Initial cluster centers. diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 7eaa2282cb..bde0f67be7 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -114,9 +114,9 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer): """ Applies transformation on a vector or an RDD[Vector]. - Note: In Python, transform cannot currently be used within - an RDD transformation or action. - Call transform directly on the RDD instead. + .. note:: In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. :param vector: Vector or RDD of Vector to be transformed. """ @@ -139,9 +139,9 @@ class StandardScalerModel(JavaVectorTransformer): """ Applies standardization transformation on a vector. - Note: In Python, transform cannot currently be used within - an RDD transformation or action. - Call transform directly on the RDD instead. + .. note:: In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. :param vector: Vector or RDD of Vector to be standardized. :return: Standardized vector. If the variance of a column is @@ -407,7 +407,7 @@ class HashingTF(object): Maps a sequence of terms to their term frequencies using the hashing trick. - Note: the terms must be hashable (can not be dict/set/list...). + .. note:: The terms must be hashable (can not be dict/set/list...). :param numFeatures: number of features (default: 2^20) @@ -469,9 +469,9 @@ class IDFModel(JavaVectorTransformer): the terms which occur in fewer than `minDocFreq` documents will have an entry of 0. - Note: In Python, transform cannot currently be used within - an RDD transformation or action. - Call transform directly on the RDD instead. + .. note:: In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. :param x: an RDD of term frequency vectors or a term frequency vector @@ -551,7 +551,7 @@ class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader): """ Transforms a word to its vector representation - Note: local use only + .. note:: Local use only :param word: a word :return: vector representation of word(s) @@ -570,7 +570,7 @@ class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader): :param num: number of synonyms to find :return: array of (word, cosineSimilarity) - Note: local use only + .. note:: Local use only """ if not isinstance(word, basestring): word = _convert_to_vector(word) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index d37e715c8d..031f22c020 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -835,11 +835,12 @@ class SparseVector(Vector): class Vectors(object): """ - Factory methods for working with vectors. Note that dense vectors - are simply represented as NumPy array objects, so there is no need - to covert them for use in MLlib. For sparse vectors, the factory - methods in this class create an MLlib-compatible type, or users - can pass in SciPy's C{scipy.sparse} column vectors. + Factory methods for working with vectors. + + .. note:: Dense vectors are simply represented as NumPy array objects, + so there is no need to covert them for use in MLlib. For sparse vectors, + the factory methods in this class create an MLlib-compatible type, or users + can pass in SciPy's C{scipy.sparse} column vectors. """ @staticmethod diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 538cada7d1..600655c912 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -171,8 +171,9 @@ class RowMatrix(DistributedMatrix): def computeCovariance(self): """ Computes the covariance matrix, treating each row as an - observation. Note that this cannot be computed on matrices - with more than 65535 columns. + observation. + + .. note:: This cannot be computed on matrices with more than 65535 columns. >>> rows = sc.parallelize([[1, 2], [2, 1]]) >>> mat = RowMatrix(rows) @@ -185,8 +186,9 @@ class RowMatrix(DistributedMatrix): @since('2.0.0') def computeGramianMatrix(self): """ - Computes the Gramian matrix `A^T A`. Note that this cannot be - computed on matrices with more than 65535 columns. + Computes the Gramian matrix `A^T A`. + + .. note:: This cannot be computed on matrices with more than 65535 columns. >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]]) >>> mat = RowMatrix(rows) @@ -458,8 +460,9 @@ class IndexedRowMatrix(DistributedMatrix): @since('2.0.0') def computeGramianMatrix(self): """ - Computes the Gramian matrix `A^T A`. Note that this cannot be - computed on matrices with more than 65535 columns. + Computes the Gramian matrix `A^T A`. + + .. note:: This cannot be computed on matrices with more than 65535 columns. >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), ... IndexedRow(1, [4, 5, 6])]) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 705022934e..1b66f5b510 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -44,7 +44,7 @@ class LabeledPoint(object): Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix). - Note: 'label' and 'features' are accessible as class attributes. + .. note:: 'label' and 'features' are accessible as class attributes. .. versionadded:: 1.0.0 """ diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index 67d5f0e44f..49b26446db 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -164,7 +164,6 @@ class Statistics(object): of fit test of the observed data against the expected distribution, or againt the uniform distribution (by default), with each category having an expected frequency of `1 / len(observed)`. - (Note: `observed` cannot contain negative values) If `observed` is matrix, conduct Pearson's independence test on the input contingency matrix, which cannot contain negative entries or @@ -176,6 +175,8 @@ class Statistics(object): contingency matrix for which the chi-squared statistic is computed. All label and feature values must be categorical. + .. note:: `observed` cannot contain negative values + :param observed: it could be a vector containing the observed categorical counts/relative frequencies, or the contingency matrix (containing either counts or relative frequencies), diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index b3011d42e5..a6089fc8b9 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -40,9 +40,9 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable): Predict values for a single data point or an RDD of points using the model trained. - Note: In Python, predict cannot currently be used within an RDD - transformation or action. - Call predict directly on the RDD instead. + .. note:: In Python, predict cannot currently be used within an RDD + transformation or action. + Call predict directly on the RDD instead. """ if isinstance(x, RDD): return self.call("predict", x.map(_convert_to_vector)) @@ -85,9 +85,9 @@ class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ Predict the label of one or more examples. - Note: In Python, predict cannot currently be used within an RDD - transformation or action. - Call predict directly on the RDD instead. + .. note:: In Python, predict cannot currently be used within an RDD + transformation or action. + Call predict directly on the RDD instead. :param x: Data point (feature vector), or an RDD of data points (feature |