aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-04-01 13:29:04 -0700
committerXiangrui Meng <meng@databricks.com>2015-04-01 13:29:11 -0700
commitf50d95a8f0832fc5e68177dafc0b2fc93c8c0d67 (patch)
tree1ccb7303f75fc730dadec73222f82facdf3c9278
parent7d029cb1eb6f1df1bce1a3f5784fb7ce2f981a33 (diff)
downloadspark-f50d95a8f0832fc5e68177dafc0b2fc93c8c0d67.tar.gz
spark-f50d95a8f0832fc5e68177dafc0b2fc93c8c0d67.tar.bz2
spark-f50d95a8f0832fc5e68177dafc0b2fc93c8c0d67.zip
[SPARK-6651][MLLIB] delegate dense vector arithmetics to the underlying numpy array
Users should be able to use numpy operators directly on dense vectors. davies atalwalkar Author: Xiangrui Meng <meng@databricks.com> Closes #5312 from mengxr/SPARK-6651 and squashes the following commits: e665c5c [Xiangrui Meng] wrap the result in a dense vector 23dfca3 [Xiangrui Meng] delegate dense vector arithmetics to the underlying numpy array (cherry picked from commit 2275acce7ba5fac83c58554d7ee9f4c7f3e866cf) Signed-off-by: Xiangrui Meng <meng@databricks.com>
-rw-r--r--python/pyspark/mllib/linalg.py38
1 files changed, 37 insertions, 1 deletions
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index f5aad28afd..8b791ff6a7 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -173,7 +173,24 @@ class Vector(object):
class DenseVector(Vector):
"""
- A dense vector represented by a value array.
+ A dense vector represented by a value array. We use numpy array for
+ storage and arithmetics will be delegated to the underlying numpy
+ array.
+
+ >>> v = Vectors.dense([1.0, 2.0])
+ >>> u = Vectors.dense([3.0, 4.0])
+ >>> v + u
+ DenseVector([4.0, 6.0])
+ >>> 2 - v
+ DenseVector([1.0, 0.0])
+ >>> v / 2
+ DenseVector([0.5, 1.0])
+ >>> v * u
+ DenseVector([3.0, 8.0])
+ >>> u / v
+ DenseVector([3.0, 2.0])
+ >>> u % 2
+ DenseVector([1.0, 0.0])
"""
def __init__(self, ar):
if isinstance(ar, basestring):
@@ -292,6 +309,25 @@ class DenseVector(Vector):
def __getattr__(self, item):
return getattr(self.array, item)
+ def _delegate(op):
+ def func(self, other):
+ if isinstance(other, DenseVector):
+ other = other.array
+ return DenseVector(getattr(self.array, op)(other))
+ return func
+
+ __neg__ = _delegate("__neg__")
+ __add__ = _delegate("__add__")
+ __sub__ = _delegate("__sub__")
+ __mul__ = _delegate("__mul__")
+ __div__ = _delegate("__div__")
+ __mod__ = _delegate("__mod__")
+ __radd__ = _delegate("__radd__")
+ __rsub__ = _delegate("__rsub__")
+ __rmul__ = _delegate("__rmul__")
+ __rdiv__ = _delegate("__rdiv__")
+ __rmod__ = _delegate("__rmod__")
+
class SparseVector(Vector):
"""