diff options
Diffstat (limited to 'python/pyspark/mllib/linalg/__init__.py')
-rw-r--r-- | python/pyspark/mllib/linalg/__init__.py | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 3a345b2b56..15dc53a959 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -39,6 +39,7 @@ else: import numpy as np from pyspark import since +from pyspark.ml import linalg as newlinalg from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \ IntegerType, ByteType, BooleanType @@ -247,6 +248,15 @@ class Vector(object): """ raise NotImplementedError + def asML(self): + """ + Convert this vector to the new mllib-local representation. + This does NOT copy the data; it copies references. + + :return: :py:class:`pyspark.ml.linalg.Vector` + """ + raise NotImplementedError + class DenseVector(Vector): """ @@ -408,6 +418,17 @@ class DenseVector(Vector): """ return self.array + def asML(self): + """ + Convert this vector to the new mllib-local representation. + This does NOT copy the data; it copies references. + + :return: :py:class:`pyspark.ml.linalg.DenseVector` + + .. versionadded:: 2.0.0 + """ + return newlinalg.DenseVector(self.array) + @property def values(self): """ @@ -737,6 +758,17 @@ class SparseVector(Vector): arr[self.indices] = self.values return arr + def asML(self): + """ + Convert this vector to the new mllib-local representation. + This does NOT copy the data; it copies references. + + :return: :py:class:`pyspark.ml.linalg.SparseVector` + + .. versionadded:: 2.0.0 + """ + return newlinalg.SparseVector(self.size, self.indices, self.values) + def __len__(self): return self.size @@ -846,6 +878,24 @@ class Vectors(object): return DenseVector(elements) @staticmethod + def fromML(vec): + """ + Convert a vector from the new mllib-local representation. + This does NOT copy the data; it copies references. + + :param vec: a :py:class:`pyspark.ml.linalg.Vector` + :return: a :py:class:`pyspark.mllib.linalg.Vector` + + .. versionadded:: 2.0.0 + """ + if isinstance(vec, newlinalg.DenseVector): + return DenseVector(vec.array) + elif isinstance(vec, newlinalg.SparseVector): + return SparseVector(vec.size, vec.indices, vec.values) + else: + raise TypeError("Unsupported vector type %s" % type(vec)) + + @staticmethod def stringify(vector): """ Converts a vector into a string, which can be recognized by @@ -945,6 +995,13 @@ class Matrix(object): """ raise NotImplementedError + def asML(self): + """ + Convert this matrix to the new mllib-local representation. + This does NOT copy the data; it copies references. + """ + raise NotImplementedError + @staticmethod def _convert_to_array(array_like, dtype): """ @@ -1044,6 +1101,17 @@ class DenseMatrix(Matrix): return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values) + def asML(self): + """ + Convert this matrix to the new mllib-local representation. + This does NOT copy the data; it copies references. + + :return: :py:class:`pyspark.ml.linalg.DenseMatrix` + + .. versionadded:: 2.0.0 + """ + return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed) + def __getitem__(self, indices): i, j = indices if i < 0 or i >= self.numRows: @@ -1216,6 +1284,18 @@ class SparseMatrix(Matrix): densevals = np.ravel(self.toArray(), order='F') return DenseMatrix(self.numRows, self.numCols, densevals) + def asML(self): + """ + Convert this matrix to the new mllib-local representation. + This does NOT copy the data; it copies references. + + :return: :py:class:`pyspark.ml.linalg.SparseMatrix` + + .. versionadded:: 2.0.0 + """ + return newlinalg.SparseMatrix(self.numRows, self.numCols, self.colPtrs, self.rowIndices, + self.values, self.isTransposed) + # TODO: More efficient implementation: def __eq__(self, other): return np.all(self.toArray() == other.toArray()) @@ -1236,6 +1316,25 @@ class Matrices(object): """ return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values) + @staticmethod + def fromML(mat): + """ + Convert a matrix from the new mllib-local representation. + This does NOT copy the data; it copies references. + + :param mat: a :py:class:`pyspark.ml.linalg.Matrix` + :return: a :py:class:`pyspark.mllib.linalg.Matrix` + + .. versionadded:: 2.0.0 + """ + if isinstance(mat, newlinalg.DenseMatrix): + return DenseMatrix(mat.numRows, mat.numCols, mat.values, mat.isTransposed) + elif isinstance(mat, newlinalg.SparseMatrix): + return SparseMatrix(mat.numRows, mat.numCols, mat.colPtrs, mat.rowIndices, + mat.values, mat.isTransposed) + else: + raise TypeError("Unsupported matrix type %s" % type(mat)) + class QRDecomposition(object): """ |