From 21b2605dc4900894ea7a911e039781ecc2a18c14 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Fri, 27 May 2016 14:02:39 -0700 Subject: [SPARK-15413][ML][MLLIB] Change `toBreeze` to `asBreeze` in Vector and Matrix ## What changes were proposed in this pull request? We're using `asML` to convert the mllib vector/matrix to ml vector/matrix now. Using `as` is more correct given that this conversion actually shares the same underline data structure. As a result, in this PR, `toBreeze` will be changed to `asBreeze`. This is a private API, as a result, it will not affect any user's application. ## How was this patch tested? unit tests Author: DB Tsai Closes #13198 from dbtsai/minor. --- .../main/scala/org/apache/spark/ml/linalg/Matrices.scala | 16 ++++++++-------- .../main/scala/org/apache/spark/ml/linalg/Vectors.scala | 8 ++++---- .../ml/stat/distribution/MultivariateGaussian.scala | 8 ++++---- .../spark/ml/linalg/BreezeMatrixConversionSuite.scala | 4 ++-- .../spark/ml/linalg/BreezeVectorConversionSuite.scala | 4 ++-- .../scala/org/apache/spark/ml/linalg/MatricesSuite.scala | 14 +++++++------- .../scala/org/apache/spark/ml/linalg/VectorsSuite.scala | 2 +- 7 files changed, 28 insertions(+), 28 deletions(-) (limited to 'mllib-local/src') diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala index a47526d36f..0ea687bbcc 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala @@ -69,7 +69,7 @@ sealed trait Matrix extends Serializable { def rowIter: Iterator[Vector] = this.transpose.colIter /** Converts to a breeze matrix. */ - private[ml] def toBreeze: BM[Double] + private[ml] def asBreeze: BM[Double] /** Gets the (i, j)-th element. */ @Since("2.0.0") @@ -112,11 +112,11 @@ sealed trait Matrix extends Serializable { } /** A human readable representation of the matrix */ - override def toString: String = toBreeze.toString() + override def toString: String = asBreeze.toString() /** A human readable representation of the matrix with maximum lines and width */ @Since("2.0.0") - def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth) + def toString(maxLines: Int, maxLineWidth: Int): String = asBreeze.toString(maxLines, maxLineWidth) /** * Map the values of this matrix using a function. Generates a new matrix. Performs the @@ -202,7 +202,7 @@ class DenseMatrix @Since("2.0.0") ( this(numRows, numCols, values, false) override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => asBreeze == m.asBreeze case _ => false } @@ -210,7 +210,7 @@ class DenseMatrix @Since("2.0.0") ( Seq(numRows, numCols, toArray).## } - private[ml] def toBreeze: BM[Double] = { + private[ml] def asBreeze: BM[Double] = { if (!isTransposed) { new BDM[Double](numRows, numCols, values) } else { @@ -488,14 +488,14 @@ class SparseMatrix @Since("2.0.0") ( rowIndices: Array[Int], values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false) - override def hashCode(): Int = toBreeze.hashCode() + override def hashCode(): Int = asBreeze.hashCode() override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => asBreeze == m.asBreeze case _ => false } - private[ml] def toBreeze: BM[Double] = { + private[ml] def asBreeze: BM[Double] = { if (!isTransposed) { new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) } else { diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 59f9c2adba..909fec1c06 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -92,14 +92,14 @@ sealed trait Vector extends Serializable { /** * Converts the instance to a breeze vector. */ - private[spark] def toBreeze: BV[Double] + private[spark] def asBreeze: BV[Double] /** * Gets the value of the ith element. * @param i index */ @Since("2.0.0") - def apply(i: Int): Double = toBreeze(i) + def apply(i: Int): Double = asBreeze(i) /** * Makes a deep copy of this vector. @@ -453,7 +453,7 @@ class DenseVector @Since("2.0.0") ( @Since("2.0.0") val values: Array[Double]) e override def toArray: Array[Double] = values - private[spark] override def toBreeze: BV[Double] = new BDV[Double](values) + private[spark] override def asBreeze: BV[Double] = new BDV[Double](values) override def apply(i: Int): Double = values(i) @@ -584,7 +584,7 @@ class SparseVector @Since("2.0.0") ( new SparseVector(size, indices.clone(), values.clone()) } - private[spark] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size) + private[spark] override def asBreeze: BV[Double] = new BSV[Double](indices, values, size) override def foreachActive(f: (Int, Double) => Unit): Unit = { var i = 0 diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala index 383d6d96e8..0be28677ef 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala @@ -47,7 +47,7 @@ class MultivariateGaussian @Since("2.0.0") ( this(Vectors.fromBreeze(mean), Matrices.fromBreeze(cov)) } - private val breezeMu = mean.toBreeze.toDenseVector + private val breezeMu = mean.asBreeze.toDenseVector /** * Compute distribution dependent constants: @@ -61,7 +61,7 @@ class MultivariateGaussian @Since("2.0.0") ( */ @Since("2.0.0") def pdf(x: Vector): Double = { - pdf(x.toBreeze) + pdf(x.asBreeze) } /** @@ -69,7 +69,7 @@ class MultivariateGaussian @Since("2.0.0") ( */ @Since("2.0.0") def logpdf(x: Vector): Double = { - logpdf(x.toBreeze) + logpdf(x.asBreeze) } /** Returns density of this multivariate Gaussian at given point, x */ @@ -113,7 +113,7 @@ class MultivariateGaussian @Since("2.0.0") ( * relation to the maximum singular value (same tolerance used by, e.g., Octave). */ private def calculateCovarianceConstants: (BDM[Double], Double) = { - val eigSym.EigSym(d, u) = eigSym(cov.toBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t + val eigSym.EigSym(d, u) = eigSym(cov.asBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t // For numerical stability, values are considered to be non-zero only if they exceed tol. // This prevents any inverted value from exceeding (eps * n * max(d))^-1 diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala index 70a21e41bf..f07ed20cf0 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.ml.SparkMLFunSuite class BreezeMatrixConversionSuite extends SparkMLFunSuite { test("dense matrix to breeze") { val mat = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) - val breeze = mat.toBreeze.asInstanceOf[BDM[Double]] + val breeze = mat.asBreeze.asInstanceOf[BDM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) assert(breeze.data.eq(mat.asInstanceOf[DenseMatrix].values), "should not copy data") @@ -48,7 +48,7 @@ class BreezeMatrixConversionSuite extends SparkMLFunSuite { val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values) - val breeze = mat.toBreeze.asInstanceOf[BSM[Double]] + val breeze = mat.asBreeze.asInstanceOf[BSM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data") diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala index 00c9ee79eb..4c9740b6bc 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala @@ -33,12 +33,12 @@ class BreezeVectorConversionSuite extends SparkMLFunSuite { test("dense to breeze") { val vec = Vectors.dense(arr) - assert(vec.toBreeze === new BDV[Double](arr)) + assert(vec.asBreeze === new BDV[Double](arr)) } test("sparse to breeze") { val vec = Vectors.sparse(n, indices, values) - assert(vec.toBreeze === new BSV[Double](indices, values, n)) + assert(vec.asBreeze === new BSV[Double](indices, values, n)) } test("dense breeze to vector") { diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala index 5c69c5ed7b..2796fcf2cb 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala @@ -61,7 +61,7 @@ class MatricesSuite extends SparkMLFunSuite { (1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0)) val mat2 = SparseMatrix.fromCOO(m, n, entries) - assert(mat.toBreeze === mat2.toBreeze) + assert(mat.asBreeze === mat2.asBreeze) assert(mat2.values.length == 4) } @@ -174,8 +174,8 @@ class MatricesSuite extends SparkMLFunSuite { val spMat2 = deMat1.toSparse val deMat2 = spMat1.toDense - assert(spMat1.toBreeze === spMat2.toBreeze) - assert(deMat1.toBreeze === deMat2.toBreeze) + assert(spMat1.asBreeze === spMat2.asBreeze) + assert(deMat1.asBreeze === deMat2.asBreeze) } test("map, update") { @@ -209,8 +209,8 @@ class MatricesSuite extends SparkMLFunSuite { val sATexpected = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT.toBreeze === dATexpected.toBreeze) - assert(sAT.toBreeze === sATexpected.toBreeze) + assert(dAT.asBreeze === dATexpected.asBreeze) + assert(sAT.asBreeze === sATexpected.asBreeze) assert(dA(1, 0) === dAT(0, 1)) assert(dA(2, 1) === dAT(1, 2)) assert(sA(1, 0) === sAT(0, 1)) @@ -219,8 +219,8 @@ class MatricesSuite extends SparkMLFunSuite { assert(!dA.toArray.eq(dAT.toArray), "has to have a new array") assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array") - assert(dAT.toSparse.toBreeze === sATexpected.toBreeze) - assert(sAT.toDense.toBreeze === dATexpected.toBreeze) + assert(dAT.toSparse.asBreeze === sATexpected.asBreeze) + assert(sAT.toDense.asBreeze === dATexpected.asBreeze) } test("foreachActive") { diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala index 887814b5e7..614be460a4 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala @@ -230,7 +230,7 @@ class VectorsSuite extends SparkMLFunSuite { val denseVector1 = Vectors.dense(sparseVector1.toArray) val denseVector2 = Vectors.dense(sparseVector2.toArray) - val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze) + val squaredDist = breezeSquaredDistance(sparseVector1.asBreeze, sparseVector2.asBreeze) // SparseVector vs. SparseVector assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) -- cgit v1.2.3