aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/linalg
diff options
context:
space:
mode:
authorKai Jiang <jiangkai@gmail.com>2016-01-05 15:33:27 -0800
committerJoseph K. Bradley <joseph@databricks.com>2016-01-05 15:33:27 -0800
commit1537e55604cafafa49a8b7f3ce915f9745392bc0 (patch)
tree7cb13626282d792f67ce6951e37286ccfc457730 /python/pyspark/mllib/linalg
parentff89975543b153d0d235c0cac615d45b34aa8fe7 (diff)
downloadspark-1537e55604cafafa49a8b7f3ce915f9745392bc0.tar.gz
spark-1537e55604cafafa49a8b7f3ce915f9745392bc0.tar.bz2
spark-1537e55604cafafa49a8b7f3ce915f9745392bc0.zip
[SPARK-12041][ML][PYSPARK] Add columnSimilarities to IndexedRowMatrix
Add `columnSimilarities` to IndexedRowMatrix for PySpark spark.mllib.linalg. Author: Kai Jiang <jiangkai@gmail.com> Closes #10158 from vectorijk/spark-12041.
Diffstat (limited to 'python/pyspark/mllib/linalg')
-rw-r--r--python/pyspark/mllib/linalg/distributed.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index 0e76050788..e1f022187d 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -297,6 +297,20 @@ class IndexedRowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numCols")
+ def columnSimilarities(self):
+ """
+ Compute all cosine similarities between columns.
+
+ >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+ ... IndexedRow(6, [4, 5, 6])])
+ >>> mat = IndexedRowMatrix(rows)
+ >>> cs = mat.columnSimilarities()
+ >>> print(cs.numCols())
+ 3
+ """
+ java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities")
+ return CoordinateMatrix(java_coordinate_matrix)
+
def toRowMatrix(self):
"""
Convert this matrix to a RowMatrix.