diff options
author | vinodkc <vinod.kc.in@gmail.com> | 2015-09-20 22:55:24 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-09-20 22:55:24 -0700 |
commit | 01440395176bdbb2662480f03b27851cb860f385 (patch) | |
tree | 58092f4d04a857be581171e75ffe5423b76138d0 /python/pyspark | |
parent | 0c498717ba9622b6c889e701e8eed5ef9215c030 (diff) | |
download | spark-01440395176bdbb2662480f03b27851cb860f385.tar.gz spark-01440395176bdbb2662480f03b27851cb860f385.tar.bz2 spark-01440395176bdbb2662480f03b27851cb860f385.zip |
[SPARK-10631] [DOCUMENTATION, MLLIB, PYSPARK] Added documentation for few APIs
There are some missing API docs in pyspark.mllib.linalg.Vector (including DenseVector and SparseVector). We should add them based on their Scala counterparts.
Author: vinodkc <vinod.kc.in@gmail.com>
Closes #8834 from vinodkc/fix_SPARK-10631.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/mllib/linalg/__init__.py | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 4829acb16e..f929e3e96f 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -301,11 +301,14 @@ class DenseVector(Vector): return DenseVector, (self.array.tostring(),) def numNonzeros(self): + """ + Number of nonzero elements. This scans all active values and count non zeros + """ return np.count_nonzero(self.array) def norm(self, p): """ - Calculte the norm of a DenseVector. + Calculates the norm of a DenseVector. >>> a = DenseVector([0, -1, 2, -3]) >>> a.norm(2) @@ -397,10 +400,16 @@ class DenseVector(Vector): return np.dot(diff, diff) def toArray(self): + """ + Returns an numpy.ndarray + """ return self.array @property def values(self): + """ + Returns a list of values + """ return self.array def __getitem__(self, item): @@ -479,8 +488,8 @@ class SparseVector(Vector): :param size: Size of the vector. :param args: Active entries, as a dictionary {index: value, ...}, - a list of tuples [(index, value), ...], or a list of strictly i - ncreasing indices and a list of corresponding values [index, ...], + a list of tuples [(index, value), ...], or a list of strictly + increasing indices and a list of corresponding values [index, ...], [value, ...]. Inactive entries are treated as zeros. >>> SparseVector(4, {1: 1.0, 3: 5.5}) @@ -521,11 +530,14 @@ class SparseVector(Vector): raise TypeError("indices array must be sorted") def numNonzeros(self): + """ + Number of nonzero elements. This scans all active values and count non zeros. + """ return np.count_nonzero(self.values) def norm(self, p): """ - Calculte the norm of a SparseVector. + Calculates the norm of a SparseVector. >>> a = SparseVector(4, [0, 1], [3., -4.]) >>> a.norm(1) @@ -797,7 +809,7 @@ class Vectors(object): values (sorted by index). :param size: Size of the vector. - :param args: Non-zero entries, as a dictionary, list of tupes, + :param args: Non-zero entries, as a dictionary, list of tuples, or two sorted lists containing indices and values. >>> Vectors.sparse(4, {1: 1.0, 3: 5.5}) |