aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/linalg/__init__.py
diff options
context:
space:
mode:
authorvinodkc <vinod.kc.in@gmail.com>2015-09-20 22:55:24 -0700
committerXiangrui Meng <meng@databricks.com>2015-09-20 22:55:24 -0700
commit01440395176bdbb2662480f03b27851cb860f385 (patch)
tree58092f4d04a857be581171e75ffe5423b76138d0 /python/pyspark/mllib/linalg/__init__.py
parent0c498717ba9622b6c889e701e8eed5ef9215c030 (diff)
downloadspark-01440395176bdbb2662480f03b27851cb860f385.tar.gz
spark-01440395176bdbb2662480f03b27851cb860f385.tar.bz2
spark-01440395176bdbb2662480f03b27851cb860f385.zip
[SPARK-10631] [DOCUMENTATION, MLLIB, PYSPARK] Added documentation for few APIs
There are some missing API docs in pyspark.mllib.linalg.Vector (including DenseVector and SparseVector). We should add them based on their Scala counterparts. Author: vinodkc <vinod.kc.in@gmail.com> Closes #8834 from vinodkc/fix_SPARK-10631.
Diffstat (limited to 'python/pyspark/mllib/linalg/__init__.py')
-rw-r--r--python/pyspark/mllib/linalg/__init__.py22
1 files changed, 17 insertions, 5 deletions
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 4829acb16e..f929e3e96f 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -301,11 +301,14 @@ class DenseVector(Vector):
return DenseVector, (self.array.tostring(),)
def numNonzeros(self):
+ """
+ Number of nonzero elements. This scans all active values and count non zeros
+ """
return np.count_nonzero(self.array)
def norm(self, p):
"""
- Calculte the norm of a DenseVector.
+ Calculates the norm of a DenseVector.
>>> a = DenseVector([0, -1, 2, -3])
>>> a.norm(2)
@@ -397,10 +400,16 @@ class DenseVector(Vector):
return np.dot(diff, diff)
def toArray(self):
+ """
+ Returns an numpy.ndarray
+ """
return self.array
@property
def values(self):
+ """
+ Returns a list of values
+ """
return self.array
def __getitem__(self, item):
@@ -479,8 +488,8 @@ class SparseVector(Vector):
:param size: Size of the vector.
:param args: Active entries, as a dictionary {index: value, ...},
- a list of tuples [(index, value), ...], or a list of strictly i
- ncreasing indices and a list of corresponding values [index, ...],
+ a list of tuples [(index, value), ...], or a list of strictly
+ increasing indices and a list of corresponding values [index, ...],
[value, ...]. Inactive entries are treated as zeros.
>>> SparseVector(4, {1: 1.0, 3: 5.5})
@@ -521,11 +530,14 @@ class SparseVector(Vector):
raise TypeError("indices array must be sorted")
def numNonzeros(self):
+ """
+ Number of nonzero elements. This scans all active values and count non zeros.
+ """
return np.count_nonzero(self.values)
def norm(self, p):
"""
- Calculte the norm of a SparseVector.
+ Calculates the norm of a SparseVector.
>>> a = SparseVector(4, [0, 1], [3., -4.])
>>> a.norm(1)
@@ -797,7 +809,7 @@ class Vectors(object):
values (sorted by index).
:param size: Size of the vector.
- :param args: Non-zero entries, as a dictionary, list of tupes,
+ :param args: Non-zero entries, as a dictionary, list of tuples,
or two sorted lists containing indices and values.
>>> Vectors.sparse(4, {1: 1.0, 3: 5.5})