aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/linalg/__init__.py
diff options
context:
space:
mode:
authorNick Pentreath <nickp@za.ibm.com>2016-06-30 17:52:15 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-06-30 17:52:15 -0700
commitdab10516138867b7c4fc6d42168497e82853b539 (patch)
treeed85182e87f912dcad810d9564cc7db47a7b2727 /python/pyspark/mllib/linalg/__init__.py
parent85f2303ecadd9bf6d9694a2743dda075654c5ccf (diff)
downloadspark-dab10516138867b7c4fc6d42168497e82853b539.tar.gz
spark-dab10516138867b7c4fc6d42168497e82853b539.tar.bz2
spark-dab10516138867b7c4fc6d42168497e82853b539.zip
[SPARK-16328][ML][MLLIB][PYSPARK] Add 'asML' and 'fromML' conversion methods to PySpark linalg
The move to `ml.linalg` created `asML`/`fromML` utility methods in Scala/Java for converting between representations. These are missing in Python, this PR adds them. ## How was this patch tested? New doctests. Author: Nick Pentreath <nickp@za.ibm.com> Closes #13997 from MLnick/SPARK-16328-python-linalg-convert.
Diffstat (limited to 'python/pyspark/mllib/linalg/__init__.py')
-rw-r--r--python/pyspark/mllib/linalg/__init__.py99
1 files changed, 99 insertions, 0 deletions
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 3a345b2b56..15dc53a959 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -39,6 +39,7 @@ else:
import numpy as np
from pyspark import since
+from pyspark.ml import linalg as newlinalg
from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
IntegerType, ByteType, BooleanType
@@ -247,6 +248,15 @@ class Vector(object):
"""
raise NotImplementedError
+ def asML(self):
+ """
+ Convert this vector to the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :return: :py:class:`pyspark.ml.linalg.Vector`
+ """
+ raise NotImplementedError
+
class DenseVector(Vector):
"""
@@ -408,6 +418,17 @@ class DenseVector(Vector):
"""
return self.array
+ def asML(self):
+ """
+ Convert this vector to the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :return: :py:class:`pyspark.ml.linalg.DenseVector`
+
+ .. versionadded:: 2.0.0
+ """
+ return newlinalg.DenseVector(self.array)
+
@property
def values(self):
"""
@@ -737,6 +758,17 @@ class SparseVector(Vector):
arr[self.indices] = self.values
return arr
+ def asML(self):
+ """
+ Convert this vector to the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :return: :py:class:`pyspark.ml.linalg.SparseVector`
+
+ .. versionadded:: 2.0.0
+ """
+ return newlinalg.SparseVector(self.size, self.indices, self.values)
+
def __len__(self):
return self.size
@@ -846,6 +878,24 @@ class Vectors(object):
return DenseVector(elements)
@staticmethod
+ def fromML(vec):
+ """
+ Convert a vector from the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :param vec: a :py:class:`pyspark.ml.linalg.Vector`
+ :return: a :py:class:`pyspark.mllib.linalg.Vector`
+
+ .. versionadded:: 2.0.0
+ """
+ if isinstance(vec, newlinalg.DenseVector):
+ return DenseVector(vec.array)
+ elif isinstance(vec, newlinalg.SparseVector):
+ return SparseVector(vec.size, vec.indices, vec.values)
+ else:
+ raise TypeError("Unsupported vector type %s" % type(vec))
+
+ @staticmethod
def stringify(vector):
"""
Converts a vector into a string, which can be recognized by
@@ -945,6 +995,13 @@ class Matrix(object):
"""
raise NotImplementedError
+ def asML(self):
+ """
+ Convert this matrix to the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+ """
+ raise NotImplementedError
+
@staticmethod
def _convert_to_array(array_like, dtype):
"""
@@ -1044,6 +1101,17 @@ class DenseMatrix(Matrix):
return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)
+ def asML(self):
+ """
+ Convert this matrix to the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :return: :py:class:`pyspark.ml.linalg.DenseMatrix`
+
+ .. versionadded:: 2.0.0
+ """
+ return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed)
+
def __getitem__(self, indices):
i, j = indices
if i < 0 or i >= self.numRows:
@@ -1216,6 +1284,18 @@ class SparseMatrix(Matrix):
densevals = np.ravel(self.toArray(), order='F')
return DenseMatrix(self.numRows, self.numCols, densevals)
+ def asML(self):
+ """
+ Convert this matrix to the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :return: :py:class:`pyspark.ml.linalg.SparseMatrix`
+
+ .. versionadded:: 2.0.0
+ """
+ return newlinalg.SparseMatrix(self.numRows, self.numCols, self.colPtrs, self.rowIndices,
+ self.values, self.isTransposed)
+
# TODO: More efficient implementation:
def __eq__(self, other):
return np.all(self.toArray() == other.toArray())
@@ -1236,6 +1316,25 @@ class Matrices(object):
"""
return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+ @staticmethod
+ def fromML(mat):
+ """
+ Convert a matrix from the new mllib-local representation.
+ This does NOT copy the data; it copies references.
+
+ :param mat: a :py:class:`pyspark.ml.linalg.Matrix`
+ :return: a :py:class:`pyspark.mllib.linalg.Matrix`
+
+ .. versionadded:: 2.0.0
+ """
+ if isinstance(mat, newlinalg.DenseMatrix):
+ return DenseMatrix(mat.numRows, mat.numCols, mat.values, mat.isTransposed)
+ elif isinstance(mat, newlinalg.SparseMatrix):
+ return SparseMatrix(mat.numRows, mat.numCols, mat.colPtrs, mat.rowIndices,
+ mat.values, mat.isTransposed)
+ else:
+ raise TypeError("Unsupported matrix type %s" % type(mat))
+
class QRDecomposition(object):
"""