diff options
author | Kai Jiang <jiangkai@gmail.com> | 2016-01-05 15:33:27 -0800 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2016-01-05 15:33:27 -0800 |
commit | 1537e55604cafafa49a8b7f3ce915f9745392bc0 (patch) | |
tree | 7cb13626282d792f67ce6951e37286ccfc457730 /python | |
parent | ff89975543b153d0d235c0cac615d45b34aa8fe7 (diff) | |
download | spark-1537e55604cafafa49a8b7f3ce915f9745392bc0.tar.gz spark-1537e55604cafafa49a8b7f3ce915f9745392bc0.tar.bz2 spark-1537e55604cafafa49a8b7f3ce915f9745392bc0.zip |
[SPARK-12041][ML][PYSPARK] Add columnSimilarities to IndexedRowMatrix
Add `columnSimilarities` to IndexedRowMatrix for PySpark spark.mllib.linalg.
Author: Kai Jiang <jiangkai@gmail.com>
Closes #10158 from vectorijk/spark-12041.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/mllib/linalg/distributed.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 0e76050788..e1f022187d 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -297,6 +297,20 @@ class IndexedRowMatrix(DistributedMatrix): """ return self._java_matrix_wrapper.call("numCols") + def columnSimilarities(self): + """ + Compute all cosine similarities between columns. + + >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]), + ... IndexedRow(6, [4, 5, 6])]) + >>> mat = IndexedRowMatrix(rows) + >>> cs = mat.columnSimilarities() + >>> print(cs.numCols()) + 3 + """ + java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities") + return CoordinateMatrix(java_coordinate_matrix) + def toRowMatrix(self): """ Convert this matrix to a RowMatrix. |