aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/linalg.py
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2014-10-28 03:50:22 -0700
committerXiangrui Meng <meng@databricks.com>2014-10-28 03:50:22 -0700
commitfae095bc7c4097859af522ced77f09cf6be17691 (patch)
tree8b4be3f716e8312c372105b4cc5132860bed2412 /python/pyspark/mllib/linalg.py
parent46c63417c1bb1aea07baf9036cc5b8f1c3781bbe (diff)
downloadspark-fae095bc7c4097859af522ced77f09cf6be17691.tar.gz
spark-fae095bc7c4097859af522ced77f09cf6be17691.tar.bz2
spark-fae095bc7c4097859af522ced77f09cf6be17691.zip
[SPARK-3961] [MLlib] [PySpark] Python API for mllib.feature
Added completed Python API for MLlib.feature Normalizer StandardScalerModel StandardScaler HashTF IDFModel IDF cc mengxr Author: Davies Liu <davies@databricks.com> Author: Davies Liu <davies.liu@gmail.com> Closes #2819 from davies/feature and squashes the following commits: 4f48f48 [Davies Liu] add a note for HashingTF 67f6d21 [Davies Liu] address comments b628693 [Davies Liu] rollback changes in Word2Vec efb4f4f [Davies Liu] Merge branch 'master' into feature 806c7c2 [Davies Liu] address comments 3abb8c2 [Davies Liu] address comments 59781b9 [Davies Liu] Merge branch 'master' of github.com:apache/spark into feature a405ae7 [Davies Liu] fix tests 7a1891a [Davies Liu] fix tests 486795f [Davies Liu] update programming guide, HashTF -> HashingTF 8a50584 [Davies Liu] Python API for mllib.feature
Diffstat (limited to 'python/pyspark/mllib/linalg.py')
-rw-r--r--python/pyspark/mllib/linalg.py16
1 files changed, 12 insertions, 4 deletions
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 773d8d3938..1b9bf59624 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -111,6 +111,13 @@ def _vector_size(v):
raise TypeError("Cannot treat type %s as a vector" % type(v))
+def _format_float(f, digits=4):
+ s = str(round(f, digits))
+ if '.' in s:
+ s = s[:s.index('.') + 1 + digits]
+ return s
+
+
class Vector(object):
"""
Abstract class for DenseVector and SparseVector
@@ -228,7 +235,7 @@ class DenseVector(Vector):
return "[" + ",".join([str(v) for v in self.array]) + "]"
def __repr__(self):
- return "DenseVector(%r)" % self.array
+ return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array))
def __eq__(self, other):
return isinstance(other, DenseVector) and self.array == other.array
@@ -416,7 +423,7 @@ class SparseVector(Vector):
Returns a copy of this SparseVector as a 1-dimensional NumPy array.
"""
arr = np.zeros((self.size,), dtype=np.float64)
- for i in xrange(self.indices.size):
+ for i in xrange(len(self.indices)):
arr[self.indices[i]] = self.values[i]
return arr
@@ -431,7 +438,8 @@ class SparseVector(Vector):
def __repr__(self):
inds = self.indices
vals = self.values
- entries = ", ".join(["{0}: {1}".format(inds[i], vals[i]) for i in xrange(len(inds))])
+ entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i]))
+ for i in xrange(len(inds))])
return "SparseVector({0}, {{{1}}})".format(self.size, entries)
def __eq__(self, other):
@@ -491,7 +499,7 @@ class Vectors(object):
returns a NumPy array.
>>> Vectors.dense([1, 2, 3])
- DenseVector(array('d', [1.0, 2.0, 3.0]))
+ DenseVector([1.0, 2.0, 3.0])
"""
return DenseVector(elements)