diff options
author | Davies Liu <davies@databricks.com> | 2014-10-28 03:50:22 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2014-10-28 03:50:22 -0700 |
commit | fae095bc7c4097859af522ced77f09cf6be17691 (patch) | |
tree | 8b4be3f716e8312c372105b4cc5132860bed2412 /python/pyspark/mllib/linalg.py | |
parent | 46c63417c1bb1aea07baf9036cc5b8f1c3781bbe (diff) | |
download | spark-fae095bc7c4097859af522ced77f09cf6be17691.tar.gz spark-fae095bc7c4097859af522ced77f09cf6be17691.tar.bz2 spark-fae095bc7c4097859af522ced77f09cf6be17691.zip |
[SPARK-3961] [MLlib] [PySpark] Python API for mllib.feature
Added completed Python API for MLlib.feature
Normalizer
StandardScalerModel
StandardScaler
HashTF
IDFModel
IDF
cc mengxr
Author: Davies Liu <davies@databricks.com>
Author: Davies Liu <davies.liu@gmail.com>
Closes #2819 from davies/feature and squashes the following commits:
4f48f48 [Davies Liu] add a note for HashingTF
67f6d21 [Davies Liu] address comments
b628693 [Davies Liu] rollback changes in Word2Vec
efb4f4f [Davies Liu] Merge branch 'master' into feature
806c7c2 [Davies Liu] address comments
3abb8c2 [Davies Liu] address comments
59781b9 [Davies Liu] Merge branch 'master' of github.com:apache/spark into feature
a405ae7 [Davies Liu] fix tests
7a1891a [Davies Liu] fix tests
486795f [Davies Liu] update programming guide, HashTF -> HashingTF
8a50584 [Davies Liu] Python API for mllib.feature
Diffstat (limited to 'python/pyspark/mllib/linalg.py')
-rw-r--r-- | python/pyspark/mllib/linalg.py | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index 773d8d3938..1b9bf59624 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -111,6 +111,13 @@ def _vector_size(v): raise TypeError("Cannot treat type %s as a vector" % type(v)) +def _format_float(f, digits=4): + s = str(round(f, digits)) + if '.' in s: + s = s[:s.index('.') + 1 + digits] + return s + + class Vector(object): """ Abstract class for DenseVector and SparseVector @@ -228,7 +235,7 @@ class DenseVector(Vector): return "[" + ",".join([str(v) for v in self.array]) + "]" def __repr__(self): - return "DenseVector(%r)" % self.array + return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array)) def __eq__(self, other): return isinstance(other, DenseVector) and self.array == other.array @@ -416,7 +423,7 @@ class SparseVector(Vector): Returns a copy of this SparseVector as a 1-dimensional NumPy array. """ arr = np.zeros((self.size,), dtype=np.float64) - for i in xrange(self.indices.size): + for i in xrange(len(self.indices)): arr[self.indices[i]] = self.values[i] return arr @@ -431,7 +438,8 @@ class SparseVector(Vector): def __repr__(self): inds = self.indices vals = self.values - entries = ", ".join(["{0}: {1}".format(inds[i], vals[i]) for i in xrange(len(inds))]) + entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i])) + for i in xrange(len(inds))]) return "SparseVector({0}, {{{1}}})".format(self.size, entries) def __eq__(self, other): @@ -491,7 +499,7 @@ class Vectors(object): returns a NumPy array. >>> Vectors.dense([1, 2, 3]) - DenseVector(array('d', [1.0, 2.0, 3.0])) + DenseVector([1.0, 2.0, 3.0]) """ return DenseVector(elements) |