From 21b2605dc4900894ea7a911e039781ecc2a18c14 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Fri, 27 May 2016 14:02:39 -0700 Subject: [SPARK-15413][ML][MLLIB] Change `toBreeze` to `asBreeze` in Vector and Matrix ## What changes were proposed in this pull request? We're using `asML` to convert the mllib vector/matrix to ml vector/matrix now. Using `as` is more correct given that this conversion actually shares the same underline data structure. As a result, in this PR, `toBreeze` will be changed to `asBreeze`. This is a private API, as a result, it will not affect any user's application. ## How was this patch tested? unit tests Author: DB Tsai Closes #13198 from dbtsai/minor. --- .../org/apache/spark/ml/linalg/Matrices.scala | 16 ++++++------- .../scala/org/apache/spark/ml/linalg/Vectors.scala | 8 +++---- .../stat/distribution/MultivariateGaussian.scala | 8 +++---- .../ml/linalg/BreezeMatrixConversionSuite.scala | 4 ++-- .../ml/linalg/BreezeVectorConversionSuite.scala | 4 ++-- .../org/apache/spark/ml/linalg/MatricesSuite.scala | 14 ++++++------ .../org/apache/spark/ml/linalg/VectorsSuite.scala | 2 +- .../main/scala/org/apache/spark/ml/ann/Layer.scala | 8 +++---- .../ml/classification/LogisticRegression.scala | 2 +- .../spark/ml/clustering/GaussianMixture.scala | 2 +- .../org/apache/spark/ml/feature/MaxAbsScaler.scala | 2 +- .../org/apache/spark/ml/feature/MinMaxScaler.scala | 2 +- .../ml/regression/AFTSurvivalRegression.scala | 2 +- .../spark/ml/regression/LinearRegression.scala | 2 +- .../apache/spark/mllib/classification/SVM.scala | 2 +- .../spark/mllib/clustering/GaussianMixture.scala | 2 +- .../mllib/clustering/GaussianMixtureModel.scala | 4 ++-- .../apache/spark/mllib/clustering/LDAModel.scala | 26 +++++++++++----------- .../spark/mllib/clustering/LDAOptimizer.scala | 6 ++--- .../spark/mllib/clustering/StreamingKMeans.scala | 4 ++-- .../org/apache/spark/mllib/linalg/Matrices.scala | 16 ++++++------- .../org/apache/spark/mllib/linalg/Vectors.scala | 8 +++---- .../mllib/linalg/distributed/BlockMatrix.scala | 8 +++---- .../spark/mllib/linalg/distributed/RowMatrix.scala | 16 ++++++------- .../spark/mllib/optimization/GradientDescent.scala | 4 ++-- .../apache/spark/mllib/optimization/LBFGS.scala | 4 ++-- .../apache/spark/mllib/optimization/Updater.scala | 14 ++++++------ .../org/apache/spark/mllib/regression/Lasso.scala | 2 +- .../spark/mllib/regression/LinearRegression.scala | 2 +- .../spark/mllib/regression/RidgeRegression.scala | 2 +- .../stat/correlation/PearsonCorrelation.scala | 2 +- .../stat/distribution/MultivariateGaussian.scala | 8 +++---- .../apache/spark/mllib/stat/test/ChiSqTest.scala | 2 +- .../spark/ml/classification/NaiveBayesSuite.scala | 6 ++--- .../classification/LogisticRegressionSuite.scala | 4 ++-- .../mllib/classification/NaiveBayesSuite.scala | 4 ++-- .../apache/spark/mllib/clustering/LDASuite.scala | 4 ++-- .../mllib/clustering/StreamingKMeansSuite.scala | 2 +- .../spark/mllib/feature/NormalizerSuite.scala | 16 ++++++------- .../mllib/linalg/BreezeMatrixConversionSuite.scala | 4 ++-- .../mllib/linalg/BreezeVectorConversionSuite.scala | 4 ++-- .../apache/spark/mllib/linalg/MatricesSuite.scala | 14 ++++++------ .../apache/spark/mllib/linalg/VectorsSuite.scala | 2 +- .../linalg/distributed/BlockMatrixSuite.scala | 2 +- .../linalg/distributed/IndexedRowMatrixSuite.scala | 10 ++++----- .../mllib/linalg/distributed/RowMatrixSuite.scala | 14 ++++++------ .../apache/spark/mllib/stat/CorrelationSuite.scala | 6 ++--- .../org/apache/spark/mllib/util/MLUtilsSuite.scala | 6 ++--- project/MimaExcludes.scala | 3 +++ 49 files changed, 156 insertions(+), 153 deletions(-) diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala index a47526d36f..0ea687bbcc 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala @@ -69,7 +69,7 @@ sealed trait Matrix extends Serializable { def rowIter: Iterator[Vector] = this.transpose.colIter /** Converts to a breeze matrix. */ - private[ml] def toBreeze: BM[Double] + private[ml] def asBreeze: BM[Double] /** Gets the (i, j)-th element. */ @Since("2.0.0") @@ -112,11 +112,11 @@ sealed trait Matrix extends Serializable { } /** A human readable representation of the matrix */ - override def toString: String = toBreeze.toString() + override def toString: String = asBreeze.toString() /** A human readable representation of the matrix with maximum lines and width */ @Since("2.0.0") - def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth) + def toString(maxLines: Int, maxLineWidth: Int): String = asBreeze.toString(maxLines, maxLineWidth) /** * Map the values of this matrix using a function. Generates a new matrix. Performs the @@ -202,7 +202,7 @@ class DenseMatrix @Since("2.0.0") ( this(numRows, numCols, values, false) override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => asBreeze == m.asBreeze case _ => false } @@ -210,7 +210,7 @@ class DenseMatrix @Since("2.0.0") ( Seq(numRows, numCols, toArray).## } - private[ml] def toBreeze: BM[Double] = { + private[ml] def asBreeze: BM[Double] = { if (!isTransposed) { new BDM[Double](numRows, numCols, values) } else { @@ -488,14 +488,14 @@ class SparseMatrix @Since("2.0.0") ( rowIndices: Array[Int], values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false) - override def hashCode(): Int = toBreeze.hashCode() + override def hashCode(): Int = asBreeze.hashCode() override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => asBreeze == m.asBreeze case _ => false } - private[ml] def toBreeze: BM[Double] = { + private[ml] def asBreeze: BM[Double] = { if (!isTransposed) { new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) } else { diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 59f9c2adba..909fec1c06 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -92,14 +92,14 @@ sealed trait Vector extends Serializable { /** * Converts the instance to a breeze vector. */ - private[spark] def toBreeze: BV[Double] + private[spark] def asBreeze: BV[Double] /** * Gets the value of the ith element. * @param i index */ @Since("2.0.0") - def apply(i: Int): Double = toBreeze(i) + def apply(i: Int): Double = asBreeze(i) /** * Makes a deep copy of this vector. @@ -453,7 +453,7 @@ class DenseVector @Since("2.0.0") ( @Since("2.0.0") val values: Array[Double]) e override def toArray: Array[Double] = values - private[spark] override def toBreeze: BV[Double] = new BDV[Double](values) + private[spark] override def asBreeze: BV[Double] = new BDV[Double](values) override def apply(i: Int): Double = values(i) @@ -584,7 +584,7 @@ class SparseVector @Since("2.0.0") ( new SparseVector(size, indices.clone(), values.clone()) } - private[spark] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size) + private[spark] override def asBreeze: BV[Double] = new BSV[Double](indices, values, size) override def foreachActive(f: (Int, Double) => Unit): Unit = { var i = 0 diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala index 383d6d96e8..0be28677ef 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala @@ -47,7 +47,7 @@ class MultivariateGaussian @Since("2.0.0") ( this(Vectors.fromBreeze(mean), Matrices.fromBreeze(cov)) } - private val breezeMu = mean.toBreeze.toDenseVector + private val breezeMu = mean.asBreeze.toDenseVector /** * Compute distribution dependent constants: @@ -61,7 +61,7 @@ class MultivariateGaussian @Since("2.0.0") ( */ @Since("2.0.0") def pdf(x: Vector): Double = { - pdf(x.toBreeze) + pdf(x.asBreeze) } /** @@ -69,7 +69,7 @@ class MultivariateGaussian @Since("2.0.0") ( */ @Since("2.0.0") def logpdf(x: Vector): Double = { - logpdf(x.toBreeze) + logpdf(x.asBreeze) } /** Returns density of this multivariate Gaussian at given point, x */ @@ -113,7 +113,7 @@ class MultivariateGaussian @Since("2.0.0") ( * relation to the maximum singular value (same tolerance used by, e.g., Octave). */ private def calculateCovarianceConstants: (BDM[Double], Double) = { - val eigSym.EigSym(d, u) = eigSym(cov.toBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t + val eigSym.EigSym(d, u) = eigSym(cov.asBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t // For numerical stability, values are considered to be non-zero only if they exceed tol. // This prevents any inverted value from exceeding (eps * n * max(d))^-1 diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala index 70a21e41bf..f07ed20cf0 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.ml.SparkMLFunSuite class BreezeMatrixConversionSuite extends SparkMLFunSuite { test("dense matrix to breeze") { val mat = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) - val breeze = mat.toBreeze.asInstanceOf[BDM[Double]] + val breeze = mat.asBreeze.asInstanceOf[BDM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) assert(breeze.data.eq(mat.asInstanceOf[DenseMatrix].values), "should not copy data") @@ -48,7 +48,7 @@ class BreezeMatrixConversionSuite extends SparkMLFunSuite { val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values) - val breeze = mat.toBreeze.asInstanceOf[BSM[Double]] + val breeze = mat.asBreeze.asInstanceOf[BSM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data") diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala index 00c9ee79eb..4c9740b6bc 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala @@ -33,12 +33,12 @@ class BreezeVectorConversionSuite extends SparkMLFunSuite { test("dense to breeze") { val vec = Vectors.dense(arr) - assert(vec.toBreeze === new BDV[Double](arr)) + assert(vec.asBreeze === new BDV[Double](arr)) } test("sparse to breeze") { val vec = Vectors.sparse(n, indices, values) - assert(vec.toBreeze === new BSV[Double](indices, values, n)) + assert(vec.asBreeze === new BSV[Double](indices, values, n)) } test("dense breeze to vector") { diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala index 5c69c5ed7b..2796fcf2cb 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala @@ -61,7 +61,7 @@ class MatricesSuite extends SparkMLFunSuite { (1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0)) val mat2 = SparseMatrix.fromCOO(m, n, entries) - assert(mat.toBreeze === mat2.toBreeze) + assert(mat.asBreeze === mat2.asBreeze) assert(mat2.values.length == 4) } @@ -174,8 +174,8 @@ class MatricesSuite extends SparkMLFunSuite { val spMat2 = deMat1.toSparse val deMat2 = spMat1.toDense - assert(spMat1.toBreeze === spMat2.toBreeze) - assert(deMat1.toBreeze === deMat2.toBreeze) + assert(spMat1.asBreeze === spMat2.asBreeze) + assert(deMat1.asBreeze === deMat2.asBreeze) } test("map, update") { @@ -209,8 +209,8 @@ class MatricesSuite extends SparkMLFunSuite { val sATexpected = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT.toBreeze === dATexpected.toBreeze) - assert(sAT.toBreeze === sATexpected.toBreeze) + assert(dAT.asBreeze === dATexpected.asBreeze) + assert(sAT.asBreeze === sATexpected.asBreeze) assert(dA(1, 0) === dAT(0, 1)) assert(dA(2, 1) === dAT(1, 2)) assert(sA(1, 0) === sAT(0, 1)) @@ -219,8 +219,8 @@ class MatricesSuite extends SparkMLFunSuite { assert(!dA.toArray.eq(dAT.toArray), "has to have a new array") assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array") - assert(dAT.toSparse.toBreeze === sATexpected.toBreeze) - assert(sAT.toDense.toBreeze === dATexpected.toBreeze) + assert(dAT.toSparse.asBreeze === sATexpected.asBreeze) + assert(sAT.toDense.asBreeze === dATexpected.asBreeze) } test("foreachActive") { diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala index 887814b5e7..614be460a4 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala @@ -230,7 +230,7 @@ class VectorsSuite extends SparkMLFunSuite { val denseVector1 = Vectors.dense(sparseVector1.toArray) val denseVector2 = Vectors.dense(sparseVector2.toArray) - val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze) + val squaredDist = breezeSquaredDistance(sparseVector1.asBreeze, sparseVector2.asBreeze) // SparseVector vs. SparseVector assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index 0a569c4917..576584c627 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -616,8 +616,8 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int) data.map { v => (0.0, Vectors.fromBreeze(BDV.vertcat( - v._1.toBreeze.toDenseVector, - v._2.toBreeze.toDenseVector)) + v._1.asBreeze.toDenseVector, + v._2.asBreeze.toDenseVector)) ) } } else { data.mapPartitions { it => @@ -665,8 +665,8 @@ private[ann] class ANNUpdater extends Updater { iter: Int, regParam: Double): (OldVector, Double) = { val thisIterStepSize = stepSize - val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector - Baxpy(-thisIterStepSize, gradient.toBreeze, brzWeights) + val brzWeights: BV[Double] = weightsOld.asBreeze.toDenseVector + Baxpy(-thisIterStepSize, gradient.asBreeze, brzWeights) (OldVectors.fromBreeze(brzWeights), 0) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 61e355ab9f..cecf3c43df 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -400,7 +400,7 @@ class LogisticRegression @Since("1.2.0") ( } val states = optimizer.iterations(new CachedDiffFunction(costFun), - initialCoefficientsWithIntercept.toBreeze.toDenseVector) + initialCoefficientsWithIntercept.asBreeze.toDenseVector) /* Note that in Logistic Regression, the objective history (loss + regularization) diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index 773e50e245..563a3b14e9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -113,7 +113,7 @@ class GaussianMixtureModel private[ml] ( private[clustering] def predictProbability(features: Vector): Vector = { val probs: Array[Double] = - GaussianMixtureModel.computeProbabilities(features.toBreeze.toDenseVector, gaussians, weights) + GaussianMixtureModel.computeProbabilities(features.asBreeze.toDenseVector, gaussians, weights) Vectors.dense(probs) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 0dffba93ac..1b5159902e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -122,7 +122,7 @@ class MaxAbsScalerModel private[ml] ( // TODO: this looks hack, we may have to handle sparse and dense vectors separately. val maxAbsUnzero = Vectors.dense(maxAbs.toArray.map(x => if (x == 0) 1 else x)) val reScale = udf { (vector: Vector) => - val brz = vector.toBreeze / maxAbsUnzero.toBreeze + val brz = vector.asBreeze / maxAbsUnzero.asBreeze Vectors.fromBreeze(brz) } dataset.withColumn($(outputCol), reScale(col($(inputCol)))) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index c6ff639f29..d15f1b8563 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -162,7 +162,7 @@ class MinMaxScalerModel private[ml] ( @Since("2.0.0") override def transform(dataset: Dataset[_]): DataFrame = { - val originalRange = (originalMax.toBreeze - originalMin.toBreeze).toArray + val originalRange = (originalMax.asBreeze - originalMin.asBreeze).toArray val minArray = originalMin.toArray val reScale = udf { (vector: Vector) => diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 00ef6ccc74..c440073842 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -223,7 +223,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S val initialParameters = Vectors.zeros(numFeatures + 2) val states = optimizer.iterations(new CachedDiffFunction(costFun), - initialParameters.toBreeze.toDenseVector) + initialParameters.asBreeze.toDenseVector) val parameters = { val arrayBuilder = mutable.ArrayBuilder.make[Double] diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 4d66b0eb37..6be2584785 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -297,7 +297,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String val initialCoefficients = Vectors.zeros(numFeatures) val states = optimizer.iterations(new CachedDiffFunction(costFun), - initialCoefficients.toBreeze.toDenseVector) + initialCoefficients.asBreeze.toDenseVector) val (coefficients, objectiveHistory) = { /* diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index a8d3fd4177..783c1c835b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -72,7 +72,7 @@ class SVMModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double) = { - val margin = weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept + val margin = weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept threshold match { case Some(t) => if (margin > t) 1.0 else 0.0 case None => margin diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala index f04c87259c..a214b1a26f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala @@ -166,7 +166,7 @@ class GaussianMixture private ( val sc = data.sparkContext // we will operate on the data as breeze data - val breezeData = data.map(_.toBreeze).cache() + val breezeData = data.map(_.asBreeze).cache() // Get length of the input vectors val d = breezeData.first().length diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index f470b0f3c3..31ad56dba6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -96,7 +96,7 @@ class GaussianMixtureModel @Since("1.3.0") ( val bcDists = sc.broadcast(gaussians) val bcWeights = sc.broadcast(weights) points.map { x => - computeSoftAssignments(x.toBreeze.toDenseVector, bcDists.value, bcWeights.value, k) + computeSoftAssignments(x.asBreeze.toDenseVector, bcDists.value, bcWeights.value, k) } } @@ -105,7 +105,7 @@ class GaussianMixtureModel @Since("1.3.0") ( */ @Since("1.4.0") def predictSoft(point: Vector): Array[Double] = { - computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k) + computeSoftAssignments(point.asBreeze.toDenseVector, gaussians, weights, k) } /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index 0a515f893d..1b66013d54 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -205,7 +205,7 @@ class LocalLDAModel private[spark] ( @Since("1.3.0") override def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])] = { - val brzTopics = topics.toBreeze.toDenseMatrix + val brzTopics = topics.asBreeze.toDenseMatrix Range(0, k).map { topicIndex => val topic = normalize(brzTopics(::, topicIndex), 1.0) val (termWeights, terms) = @@ -233,7 +233,7 @@ class LocalLDAModel private[spark] ( */ @Since("1.5.0") def logLikelihood(documents: RDD[(Long, Vector)]): Double = logLikelihoodBound(documents, - docConcentration, topicConcentration, topicsMatrix.toBreeze.toDenseMatrix, gammaShape, k, + docConcentration, topicConcentration, topicsMatrix.asBreeze.toDenseMatrix, gammaShape, k, vocabSize) /** @@ -291,7 +291,7 @@ class LocalLDAModel private[spark] ( gammaShape: Double, k: Int, vocabSize: Long): Double = { - val brzAlpha = alpha.toBreeze.toDenseVector + val brzAlpha = alpha.asBreeze.toDenseVector // transpose because dirichletExpectation normalizes by row and we need to normalize // by topic (columns of lambda) val Elogbeta = LDAUtils.dirichletExpectation(lambda.t).t @@ -344,9 +344,9 @@ class LocalLDAModel private[spark] ( def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = { // Double transpose because dirichletExpectation normalizes by row and we need to normalize // by topic (columns of lambda) - val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.toBreeze.toDenseMatrix.t).t) + val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.asBreeze.toDenseMatrix.t).t) val expElogbetaBc = documents.sparkContext.broadcast(expElogbeta) - val docConcentrationBrz = this.docConcentration.toBreeze + val docConcentrationBrz = this.docConcentration.asBreeze val gammaShape = this.gammaShape val k = this.k @@ -367,9 +367,9 @@ class LocalLDAModel private[spark] ( /** Get a method usable as a UDF for [[topicDistributions()]] */ private[spark] def getTopicDistributionMethod(sc: SparkContext): Vector => Vector = { - val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.toBreeze.toDenseMatrix.t).t) + val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.asBreeze.toDenseMatrix.t).t) val expElogbetaBc = sc.broadcast(expElogbeta) - val docConcentrationBrz = this.docConcentration.toBreeze + val docConcentrationBrz = this.docConcentration.asBreeze val gammaShape = this.gammaShape val k = this.k @@ -399,14 +399,14 @@ class LocalLDAModel private[spark] ( */ @Since("2.0.0") def topicDistribution(document: Vector): Vector = { - val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.toBreeze.toDenseMatrix.t).t) + val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.asBreeze.toDenseMatrix.t).t) if (document.numNonzeros == 0) { Vectors.zeros(this.k) } else { val (gamma, _, _) = OnlineLDAOptimizer.variationalTopicInference( document, expElogbeta, - this.docConcentration.toBreeze, + this.docConcentration.asBreeze, gammaShape, this.k) Vectors.dense(normalize(gamma, 1.0).toArray) @@ -456,7 +456,7 @@ object LocalLDAModel extends Loader[LocalLDAModel] { ("gammaShape" -> gammaShape))) sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path)) - val topicsDenseMatrix = topicsMatrix.toBreeze.toDenseMatrix + val topicsDenseMatrix = topicsMatrix.asBreeze.toDenseMatrix val topics = Range(0, k).map { topicInd => Data(Vectors.dense((topicsDenseMatrix(::, topicInd).toArray)), topicInd) } @@ -480,7 +480,7 @@ object LocalLDAModel extends Loader[LocalLDAModel] { val brzTopics = BDM.zeros[Double](vocabSize, k) topics.foreach { case Row(vec: Vector, ind: Int) => - brzTopics(::, ind) := vec.toBreeze + brzTopics(::, ind) := vec.asBreeze } val topicsMat = Matrices.fromBreeze(brzTopics) @@ -896,9 +896,9 @@ object DistributedLDAModel extends Loader[DistributedLDAModel] { Loader.checkSchema[VertexData](vertexDataFrame.schema) Loader.checkSchema[EdgeData](edgeDataFrame.schema) val globalTopicTotals: LDA.TopicCounts = - dataFrame.first().getAs[Vector](0).toBreeze.toDenseVector + dataFrame.first().getAs[Vector](0).asBreeze.toDenseVector val vertices: RDD[(VertexId, LDA.TopicCounts)] = vertexDataFrame.rdd.map { - case Row(ind: Long, vec: Vector) => (ind, vec.toBreeze.toDenseVector) + case Row(ind: Long, vec: Vector) => (ind, vec.asBreeze.toDenseVector) } val edges: RDD[Edge[LDA.TokenCount]] = edgeDataFrame.rdd.map { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index 1b3e2f600d..2436efba32 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -137,7 +137,7 @@ final class EMLDAOptimizer extends LDAOptimizer { // For each document, create an edge (Document -> Term) for each unique term in the document. val edges: RDD[Edge[TokenCount]] = docs.flatMap { case (docID: Long, termCounts: Vector) => // Add edges for terms with non-zero counts. - termCounts.toBreeze.activeIterator.filter(_._2 != 0.0).map { case (term, cnt) => + termCounts.asBreeze.activeIterator.filter(_._2 != 0.0).map { case (term, cnt) => Edge(docID, term2index(term), cnt) } } @@ -457,7 +457,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer { val vocabSize = this.vocabSize val expElogbeta = exp(LDAUtils.dirichletExpectation(lambda)).t val expElogbetaBc = batch.sparkContext.broadcast(expElogbeta) - val alpha = this.alpha.toBreeze + val alpha = this.alpha.asBreeze val gammaShape = this.gammaShape val stats: RDD[(BDM[Double], List[BDV[Double]])] = batch.mapPartitions { docs => @@ -507,7 +507,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer { private def updateAlpha(gammat: BDM[Double]): Unit = { val weight = rho() val N = gammat.rows.toDouble - val alpha = this.alpha.toBreeze.toDenseVector + val alpha = this.alpha.asBreeze.toDenseVector val logphat: BDM[Double] = sum(LDAUtils.dirichletExpectation(gammat)(::, breeze.linalg.*)) / N val gradf = N * (-LDAUtils.dirichletExpectation(alpha) + logphat.toDenseVector) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala index 24e1cff0dc..52bdccb919 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala @@ -135,8 +135,8 @@ class StreamingKMeansModel @Since("1.2.0") ( while (j < dim) { val x = largestClusterCenter(j) val p = 1e-14 * math.max(math.abs(x), 1.0) - largestClusterCenter.toBreeze(j) = x + p - smallestClusterCenter.toBreeze(j) = x - p + largestClusterCenter.asBreeze(j) = x + p + smallestClusterCenter.asBreeze(j) = x - p j += 1 } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index ee1956c2d4..e8f34388cd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -75,7 +75,7 @@ sealed trait Matrix extends Serializable { def rowIter: Iterator[Vector] = this.transpose.colIter /** Converts to a breeze matrix. */ - private[mllib] def toBreeze: BM[Double] + private[mllib] def asBreeze: BM[Double] /** Gets the (i, j)-th element. */ @Since("1.3.0") @@ -118,11 +118,11 @@ sealed trait Matrix extends Serializable { } /** A human readable representation of the matrix */ - override def toString: String = toBreeze.toString() + override def toString: String = asBreeze.toString() /** A human readable representation of the matrix with maximum lines and width */ @Since("1.4.0") - def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth) + def toString(maxLines: Int, maxLineWidth: Int): String = asBreeze.toString(maxLines, maxLineWidth) /** * Map the values of this matrix using a function. Generates a new matrix. Performs the @@ -300,7 +300,7 @@ class DenseMatrix @Since("1.3.0") ( this(numRows, numCols, values, false) override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => asBreeze == m.asBreeze case _ => false } @@ -308,7 +308,7 @@ class DenseMatrix @Since("1.3.0") ( com.google.common.base.Objects.hashCode(numRows: Integer, numCols: Integer, toArray) } - private[mllib] def toBreeze: BM[Double] = { + private[mllib] def asBreeze: BM[Double] = { if (!isTransposed) { new BDM[Double](numRows, numCols, values) } else { @@ -607,13 +607,13 @@ class SparseMatrix @Since("1.3.0") ( values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false) override def equals(o: Any): Boolean = o match { - case m: Matrix => toBreeze == m.toBreeze + case m: Matrix => asBreeze == m.asBreeze case _ => false } - override def hashCode(): Int = toBreeze.hashCode + override def hashCode(): Int = asBreeze.hashCode - private[mllib] def toBreeze: BM[Double] = { + private[mllib] def asBreeze: BM[Double] = { if (!isTransposed) { new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) } else { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 7ebcd297bd..02fd60da7d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -103,14 +103,14 @@ sealed trait Vector extends Serializable { /** * Converts the instance to a breeze vector. */ - private[spark] def toBreeze: BV[Double] + private[spark] def asBreeze: BV[Double] /** * Gets the value of the ith element. * @param i index */ @Since("1.1.0") - def apply(i: Int): Double = toBreeze(i) + def apply(i: Int): Double = asBreeze(i) /** * Makes a deep copy of this vector. @@ -610,7 +610,7 @@ class DenseVector @Since("1.0.0") ( @Since("1.0.0") override def toArray: Array[Double] = values - private[spark] override def toBreeze: BV[Double] = new BDV[Double](values) + private[spark] override def asBreeze: BV[Double] = new BDV[Double](values) @Since("1.0.0") override def apply(i: Int): Double = values(i) @@ -770,7 +770,7 @@ class SparseVector @Since("1.0.0") ( new SparseVector(size, indices.clone(), values.clone()) } - private[spark] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size) + private[spark] override def asBreeze: BV[Double] = new BSV[Double](indices, values, size) @Since("1.6.0") override def foreachActive(f: (Int, Double) => Unit): Unit = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 580d7a98fb..7a24617781 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -275,7 +275,7 @@ class BlockMatrix @Since("1.3.0") ( val rows = blocks.flatMap { case ((blockRowIdx, blockColIdx), mat) => mat.rowIter.zipWithIndex.map { case (vector, rowIdx) => - blockRowIdx * rowsPerBlock + rowIdx -> (blockColIdx, vector.toBreeze) + blockRowIdx * rowsPerBlock + rowIdx -> (blockColIdx, vector.asBreeze) } }.groupByKey().map { case (rowIdx, vectors) => val numberNonZeroPerRow = vectors.map(_._2.activeSize).sum.toDouble / cols.toDouble @@ -367,12 +367,12 @@ class BlockMatrix @Since("1.3.0") ( } if (a.isEmpty) { val zeroBlock = BM.zeros[Double](b.head.numRows, b.head.numCols) - val result = binMap(zeroBlock, b.head.toBreeze) + val result = binMap(zeroBlock, b.head.asBreeze) new MatrixBlock((blockRowIndex, blockColIndex), Matrices.fromBreeze(result)) } else if (b.isEmpty) { new MatrixBlock((blockRowIndex, blockColIndex), a.head) } else { - val result = binMap(a.head.toBreeze, b.head.toBreeze) + val result = binMap(a.head.asBreeze, b.head.asBreeze) new MatrixBlock((blockRowIndex, blockColIndex), Matrices.fromBreeze(result)) } } @@ -479,7 +479,7 @@ class BlockMatrix @Since("1.3.0") ( case _ => throw new SparkException(s"Unrecognized matrix type ${rightBlock.getClass}.") } - ((leftRowIndex, rightColIndex), C.toBreeze) + ((leftRowIndex, rightColIndex), C.asBreeze) } } }.reduceByKey(resultPartitioner, (a, b) => a + b).mapValues(Matrices.fromBreeze) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 4b8ed301eb..cd5209d0eb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -92,7 +92,7 @@ class RowMatrix @Since("1.0.0") ( val vbr = rows.context.broadcast(v) rows.treeAggregate(BDV.zeros[Double](n))( seqOp = (U, r) => { - val rBrz = r.toBreeze + val rBrz = r.asBreeze val a = rBrz.dot(vbr.value) rBrz match { // use specialized axpy for better performance @@ -250,12 +250,12 @@ class RowMatrix @Since("1.0.0") ( val (sigmaSquares: BDV[Double], u: BDM[Double]) = computeMode match { case SVDMode.LocalARPACK => require(k < n, s"k must be smaller than n in local-eigs mode but got k=$k and n=$n.") - val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]] + val G = computeGramianMatrix().asBreeze.asInstanceOf[BDM[Double]] EigenValueDecomposition.symmetricEigs(v => G * v, n, k, tol, maxIter) case SVDMode.LocalLAPACK => // breeze (v0.10) svd latent constraint, 7 * n * n + 4 * n < Int.MaxValue require(n < 17515, s"$n exceeds the breeze svd capability") - val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]] + val G = computeGramianMatrix().asBreeze.asInstanceOf[BDM[Double]] val brzSvd.SVD(uFull: BDM[Double], sigmaSquaresFull: BDV[Double], _) = brzSvd(G) (sigmaSquaresFull, uFull) case SVDMode.DistARPACK => @@ -338,7 +338,7 @@ class RowMatrix @Since("1.0.0") ( // large but Cov(X, Y) is small, but it is good for sparse computation. // TODO: find a fast and stable way for sparse data. - val G = computeGramianMatrix().toBreeze + val G = computeGramianMatrix().asBreeze var i = 0 var j = 0 @@ -381,7 +381,7 @@ class RowMatrix @Since("1.0.0") ( val n = numCols().toInt require(k > 0 && k <= n, s"k = $k out of range (0, n = $n]") - val Cov = computeCovariance().toBreeze.asInstanceOf[BDM[Double]] + val Cov = computeCovariance().asBreeze.asInstanceOf[BDM[Double]] val brzSvd.SVD(u: BDM[Double], s: BDV[Double], _) = brzSvd(Cov) @@ -436,14 +436,14 @@ class RowMatrix @Since("1.0.0") ( require(B.isInstanceOf[DenseMatrix], s"Only support dense matrix at this time but found ${B.getClass.getName}.") - val Bb = rows.context.broadcast(B.toBreeze.asInstanceOf[BDM[Double]].toDenseVector.toArray) + val Bb = rows.context.broadcast(B.asBreeze.asInstanceOf[BDM[Double]].toDenseVector.toArray) val AB = rows.mapPartitions { iter => val Bi = Bb.value iter.map { row => val v = BDV.zeros[Double](k) var i = 0 while (i < k) { - v(i) = row.toBreeze.dot(new BDV(Bi, i * n, 1, n)) + v(i) = row.asBreeze.dot(new BDV(Bi, i * n, 1, n)) i += 1 } Vectors.fromBreeze(v) @@ -541,7 +541,7 @@ class RowMatrix @Since("1.0.0") ( val bdm = BDM.zeros[Double](partRows.length, col) var i = 0 partRows.foreach { row => - bdm(i, ::) := row.toBreeze.t + bdm(i, ::) := row.asBreeze.t i += 1 } breeze.linalg.qr.reduced(bdm).r diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala index 735e780909..480a64548c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -301,8 +301,8 @@ object GradientDescent extends Logging { currentWeights: Vector, convergenceTol: Double): Boolean = { // To compare with convergence tolerance. - val previousBDV = previousWeights.toBreeze.toDenseVector - val currentBDV = currentWeights.toBreeze.toDenseVector + val previousBDV = previousWeights.asBreeze.toDenseVector + val currentBDV = currentWeights.asBreeze.toDenseVector // This represents the difference of updated weights in the iteration. val solutionVecDiff: Double = norm(previousBDV - currentBDV) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index 74e2cad76c..ec6ffe6e19 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -200,7 +200,7 @@ object LBFGS extends Logging { val lbfgs = new BreezeLBFGS[BDV[Double]](maxNumIterations, numCorrections, convergenceTol) val states = - lbfgs.iterations(new CachedDiffFunction(costFun), initialWeights.toBreeze.toDenseVector) + lbfgs.iterations(new CachedDiffFunction(costFun), initialWeights.asBreeze.toDenseVector) /** * NOTE: lossSum and loss is computed using the weights from the previous iteration @@ -281,7 +281,7 @@ object LBFGS extends Logging { // gradientTotal = gradientSum / numExamples + gradientTotal axpy(1.0 / numExamples, gradientSum, gradientTotal) - (loss, gradientTotal.toBreeze.asInstanceOf[BDV[Double]]) + (loss, gradientTotal.asBreeze.asInstanceOf[BDV[Double]]) } } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala index 03c01e0553..67d484575d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala @@ -75,8 +75,8 @@ class SimpleUpdater extends Updater { iter: Int, regParam: Double): (Vector, Double) = { val thisIterStepSize = stepSize / math.sqrt(iter) - val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector - brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights) + val brzWeights: BV[Double] = weightsOld.asBreeze.toDenseVector + brzAxpy(-thisIterStepSize, gradient.asBreeze, brzWeights) (Vectors.fromBreeze(brzWeights), 0) } @@ -87,7 +87,7 @@ class SimpleUpdater extends Updater { * Updater for L1 regularized problems. * R(w) = ||w||_1 * Uses a step-size decreasing with the square root of the number of iterations. - + * * Instead of subgradient of the regularizer, the proximal operator for the * L1 regularization is applied after the gradient step. This is known to * result in better sparsity of the intermediate solution. @@ -111,8 +111,8 @@ class L1Updater extends Updater { regParam: Double): (Vector, Double) = { val thisIterStepSize = stepSize / math.sqrt(iter) // Take gradient step - val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector - brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights) + val brzWeights: BV[Double] = weightsOld.asBreeze.toDenseVector + brzAxpy(-thisIterStepSize, gradient.asBreeze, brzWeights) // Apply proximal operator (soft thresholding) val shrinkageVal = regParam * thisIterStepSize var i = 0 @@ -146,9 +146,9 @@ class SquaredL2Updater extends Updater { // w' = w - thisIterStepSize * (gradient + regParam * w) // w' = (1 - thisIterStepSize * regParam) * w - thisIterStepSize * gradient val thisIterStepSize = stepSize / math.sqrt(iter) - val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector + val brzWeights: BV[Double] = weightsOld.asBreeze.toDenseVector brzWeights :*= (1.0 - thisIterStepSize * regParam) - brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights) + brzAxpy(-thisIterStepSize, gradient.asBreeze, brzWeights) val norm = brzNorm(brzWeights, 2.0) (Vectors.fromBreeze(brzWeights), 0.5 * regParam * norm * norm) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index e14bddf97d..cef1b4f51b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -44,7 +44,7 @@ class LassoModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double = { - weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept + weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 2ceac4b8cc..60262fdc49 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -44,7 +44,7 @@ class LinearRegressionModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double = { - weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept + weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 301f02fd98..52977ac4f0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -45,7 +45,7 @@ class RidgeRegressionModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double = { - weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept + weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala index 515be0b817..e478c31bc9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala @@ -55,7 +55,7 @@ private[stat] object PearsonCorrelation extends Correlation with Logging { * 0 variance results in a correlation value of Double.NaN. */ def computeCorrelationMatrixFromCovariance(covarianceMatrix: Matrix): Matrix = { - val cov = covarianceMatrix.toBreeze.asInstanceOf[BDM[Double]] + val cov = covarianceMatrix.asBreeze.asInstanceOf[BDM[Double]] val n = cov.cols // Compute the standard deviation on the diagonals first diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala index c806d6bdf6..39c3644450 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala @@ -42,7 +42,7 @@ class MultivariateGaussian @Since("1.3.0") ( require(sigma.numCols == sigma.numRows, "Covariance matrix must be square") require(mu.size == sigma.numCols, "Mean vector length must match covariance matrix size") - private val breezeMu = mu.toBreeze.toDenseVector + private val breezeMu = mu.asBreeze.toDenseVector /** * private[mllib] constructor @@ -66,7 +66,7 @@ class MultivariateGaussian @Since("1.3.0") ( */ @Since("1.3.0") def pdf(x: Vector): Double = { - pdf(x.toBreeze) + pdf(x.asBreeze) } /** @@ -74,7 +74,7 @@ class MultivariateGaussian @Since("1.3.0") ( */ @Since("1.3.0") def logpdf(x: Vector): Double = { - logpdf(x.toBreeze) + logpdf(x.asBreeze) } /** Returns density of this multivariate Gaussian at given point, x */ @@ -118,7 +118,7 @@ class MultivariateGaussian @Since("1.3.0") ( * relation to the maximum singular value (same tolerance used by, e.g., Octave). */ private def calculateCovarianceConstants: (DBM[Double], Double) = { - val eigSym.EigSym(d, u) = eigSym(sigma.toBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t + val eigSym.EigSym(d, u) = eigSym(sigma.asBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t // For numerical stability, values are considered to be non-zero only if they exceed tol. // This prevents any inverted value from exceeding (eps * n * max(d))^-1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala index 76ca6a8abd..da5df9bf45 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala @@ -110,7 +110,7 @@ private[stat] object ChiSqTest extends Logging { } i += 1 distinctLabels += label - val brzFeatures = features.toBreeze + val brzFeatures = features.asBreeze (startCol until endCol).map { col => val feature = brzFeatures(col) allDistinctFeatures(col) += feature diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala index ff52115ec0..04c010bd13 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala @@ -69,7 +69,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa } def expectedMultinomialProbabilities(model: NaiveBayesModel, feature: Vector): Vector = { - val logClassProbs: BV[Double] = model.pi.toBreeze + model.theta.multiply(feature).toBreeze + val logClassProbs: BV[Double] = model.pi.asBreeze + model.theta.multiply(feature).asBreeze val classProbs = logClassProbs.toArray.map(math.exp) val classProbsSum = classProbs.sum Vectors.dense(classProbs.map(_ / classProbsSum)) @@ -78,8 +78,8 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa def expectedBernoulliProbabilities(model: NaiveBayesModel, feature: Vector): Vector = { val negThetaMatrix = model.theta.map(v => math.log(1.0 - math.exp(v))) val negFeature = Vectors.dense(feature.toArray.map(v => 1.0 - v)) - val piTheta: BV[Double] = model.pi.toBreeze + model.theta.multiply(feature).toBreeze - val logClassProbs: BV[Double] = piTheta + negThetaMatrix.multiply(negFeature).toBreeze + val piTheta: BV[Double] = model.pi.asBreeze + model.theta.multiply(feature).asBreeze + val logClassProbs: BV[Double] = piTheta + negThetaMatrix.multiply(negFeature).asBreeze val classProbs = logClassProbs.toArray.map(math.exp) val classProbsSum = classProbs.sum Vectors.dense(classProbs.map(_ / classProbsSum)) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala index 28fada7053..5cf4377768 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -411,10 +411,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w val testRDD1 = sc.parallelize(testData, 2) val testRDD2 = sc.parallelize( - testData.map(x => LabeledPoint(x.label, Vectors.fromBreeze(x.features.toBreeze * 1.0E3))), 2) + testData.map(x => LabeledPoint(x.label, Vectors.fromBreeze(x.features.asBreeze * 1.0E3))), 2) val testRDD3 = sc.parallelize( - testData.map(x => LabeledPoint(x.label, Vectors.fromBreeze(x.features.toBreeze * 1.0E6))), 2) + testData.map(x => LabeledPoint(x.label, Vectors.fromBreeze(x.features.asBreeze * 1.0E6))), 2) testRDD1.cache() testRDD2.cache() diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala index ab54cb06d5..0c0aefc52b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala @@ -182,7 +182,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext { val piVector = new BDV(model.pi) // model.theta is row-major; treat it as col-major representation of transpose, and transpose: val thetaMatrix = new BDM(model.theta(0).length, model.theta.length, model.theta.flatten).t - val logClassProbs: BV[Double] = piVector + (thetaMatrix * testData.toBreeze) + val logClassProbs: BV[Double] = piVector + (thetaMatrix * testData.asBreeze) val classProbs = logClassProbs.toArray.map(math.exp) val classProbsSum = classProbs.sum classProbs.map(_ / classProbsSum) @@ -234,7 +234,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext { val thetaMatrix = new BDM(model.theta(0).length, model.theta.length, model.theta.flatten).t val negThetaMatrix = new BDM(model.theta(0).length, model.theta.length, model.theta.flatten.map(v => math.log(1.0 - math.exp(v)))).t - val testBreeze = testData.toBreeze + val testBreeze = testData.asBreeze val negTestBreeze = new BDV(Array.fill(testBreeze.size)(1.0)) - testBreeze val piTheta: BV[Double] = piVector + (thetaMatrix * testBreeze) val logClassProbs: BV[Double] = piTheta + (negThetaMatrix * negTestBreeze) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala index ea23196d2c..eb050158d4 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala @@ -116,7 +116,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext { case (docId, (topicDistribution, (indices, weights))) => assert(indices.length == 2) assert(weights.length == 2) - val bdvTopicDist = topicDistribution.toBreeze + val bdvTopicDist = topicDistribution.asBreeze val top2Indices = argtopk(bdvTopicDist, 2) assert(top2Indices.toArray === indices) assert(bdvTopicDist(top2Indices).toArray === weights) @@ -369,7 +369,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext { val actualPredictions = ldaModel.topicDistributions(docs).cache() val topTopics = actualPredictions.map { case (id, topics) => // convert results to expectedPredictions format, which only has highest probability topic - val topicsBz = topics.toBreeze.toDenseVector + val topicsBz = topics.asBreeze.toDenseVector (id, (argmax(topicsBz), max(topicsBz))) }.sortByKey() .values diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala index 65e37c64d4..fdaa098345 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala @@ -67,7 +67,7 @@ class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase { // estimated center from streaming should exactly match the arithmetic mean of all data points // because the decay factor is set to 1.0 val grandMean = - input.flatten.map(x => x.toBreeze).reduce(_ + _) / (numBatches * numPoints).toDouble + input.flatten.map(x => x.asBreeze).reduce(_ + _) / (numBatches * numPoints).toDouble assert(model.latestModel().clusterCenters(0) ~== Vectors.dense(grandMean.toArray) absTol 1E-5) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala index 34122d6ed2..10f7bafd6c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala @@ -51,10 +51,10 @@ class NormalizerSuite extends SparkFunSuite with MLlibTestSparkContext { assert((data1, data1RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) - assert(brzNorm(data1(0).toBreeze, 1) ~== 1.0 absTol 1E-5) - assert(brzNorm(data1(2).toBreeze, 1) ~== 1.0 absTol 1E-5) - assert(brzNorm(data1(3).toBreeze, 1) ~== 1.0 absTol 1E-5) - assert(brzNorm(data1(4).toBreeze, 1) ~== 1.0 absTol 1E-5) + assert(brzNorm(data1(0).asBreeze, 1) ~== 1.0 absTol 1E-5) + assert(brzNorm(data1(2).asBreeze, 1) ~== 1.0 absTol 1E-5) + assert(brzNorm(data1(3).asBreeze, 1) ~== 1.0 absTol 1E-5) + assert(brzNorm(data1(4).asBreeze, 1) ~== 1.0 absTol 1E-5) assert(data1(0) ~== Vectors.sparse(3, Seq((0, -0.465116279), (1, 0.53488372))) absTol 1E-5) assert(data1(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5) @@ -78,10 +78,10 @@ class NormalizerSuite extends SparkFunSuite with MLlibTestSparkContext { assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) - assert(brzNorm(data2(0).toBreeze, 2) ~== 1.0 absTol 1E-5) - assert(brzNorm(data2(2).toBreeze, 2) ~== 1.0 absTol 1E-5) - assert(brzNorm(data2(3).toBreeze, 2) ~== 1.0 absTol 1E-5) - assert(brzNorm(data2(4).toBreeze, 2) ~== 1.0 absTol 1E-5) + assert(brzNorm(data2(0).asBreeze, 2) ~== 1.0 absTol 1E-5) + assert(brzNorm(data2(2).asBreeze, 2) ~== 1.0 absTol 1E-5) + assert(brzNorm(data2(3).asBreeze, 2) ~== 1.0 absTol 1E-5) + assert(brzNorm(data2(4).asBreeze, 2) ~== 1.0 absTol 1E-5) assert(data2(0) ~== Vectors.sparse(3, Seq((0, -0.65617871), (1, 0.75460552))) absTol 1E-5) assert(data2(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala index de2c3c13bd..9e4735afdd 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.SparkFunSuite class BreezeMatrixConversionSuite extends SparkFunSuite { test("dense matrix to breeze") { val mat = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) - val breeze = mat.toBreeze.asInstanceOf[BDM[Double]] + val breeze = mat.asBreeze.asInstanceOf[BDM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) assert(breeze.data.eq(mat.asInstanceOf[DenseMatrix].values), "should not copy data") @@ -48,7 +48,7 @@ class BreezeMatrixConversionSuite extends SparkFunSuite { val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values) - val breeze = mat.toBreeze.asInstanceOf[BSM[Double]] + val breeze = mat.asBreeze.asInstanceOf[BSM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data") diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala index 3772c9235a..996f621f18 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala @@ -33,12 +33,12 @@ class BreezeVectorConversionSuite extends SparkFunSuite { test("dense to breeze") { val vec = Vectors.dense(arr) - assert(vec.toBreeze === new BDV[Double](arr)) + assert(vec.asBreeze === new BDV[Double](arr)) } test("sparse to breeze") { val vec = Vectors.sparse(n, indices, values) - assert(vec.toBreeze === new BSV[Double](indices, values, n)) + assert(vec.asBreeze === new BSV[Double](indices, values, n)) } test("dense breeze to vector") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 8c5b4bda25..d0c4dd28e1 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -63,7 +63,7 @@ class MatricesSuite extends SparkFunSuite { (1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0)) val mat2 = SparseMatrix.fromCOO(m, n, entries) - assert(mat.toBreeze === mat2.toBreeze) + assert(mat.asBreeze === mat2.asBreeze) assert(mat2.values.length == 4) } @@ -176,8 +176,8 @@ class MatricesSuite extends SparkFunSuite { val spMat2 = deMat1.toSparse val deMat2 = spMat1.toDense - assert(spMat1.toBreeze === spMat2.toBreeze) - assert(deMat1.toBreeze === deMat2.toBreeze) + assert(spMat1.asBreeze === spMat2.asBreeze) + assert(deMat1.asBreeze === deMat2.asBreeze) } test("map, update") { @@ -211,8 +211,8 @@ class MatricesSuite extends SparkFunSuite { val sATexpected = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT.toBreeze === dATexpected.toBreeze) - assert(sAT.toBreeze === sATexpected.toBreeze) + assert(dAT.asBreeze === dATexpected.asBreeze) + assert(sAT.asBreeze === sATexpected.asBreeze) assert(dA(1, 0) === dAT(0, 1)) assert(dA(2, 1) === dAT(1, 2)) assert(sA(1, 0) === sAT(0, 1)) @@ -221,8 +221,8 @@ class MatricesSuite extends SparkFunSuite { assert(!dA.toArray.eq(dAT.toArray), "has to have a new array") assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array") - assert(dAT.toSparse.toBreeze === sATexpected.toBreeze) - assert(sAT.toDense.toBreeze === dATexpected.toBreeze) + assert(dAT.toSparse.asBreeze === sATexpected.asBreeze) + assert(sAT.toDense.asBreeze === dATexpected.asBreeze) } test("foreachActive") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 2e9c40ab88..71a3ceac1b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -269,7 +269,7 @@ class VectorsSuite extends SparkFunSuite with Logging { val denseVector1 = Vectors.dense(sparseVector1.toArray) val denseVector2 = Vectors.dense(sparseVector2.toArray) - val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze) + val squaredDist = breezeSquaredDistance(sparseVector1.asBreeze, sparseVector2.asBreeze) // SparseVector vs. SparseVector assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala index f37eaf225a..e5a2cbbb58 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala @@ -152,7 +152,7 @@ class BlockMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { val C = B.toIndexedRowMatrix.rows.collect - (C(0).vector.toBreeze, C(1).vector.toBreeze) match { + (C(0).vector.asBreeze, C(1).vector.asBreeze) match { case (denseVector: BDV[Double], sparseVector: BSV[Double]) => assert(denseVector.length === sparseVector.length) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala index 5b7ccb9015..99af5fa10d 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala @@ -108,7 +108,7 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { val C = A.multiply(B) val localA = A.toBreeze() val localC = C.toBreeze() - val expected = localA * B.toBreeze.asInstanceOf[BDM[Double]] + val expected = localA * B.asBreeze.asInstanceOf[BDM[Double]] assert(localC === expected) } @@ -119,7 +119,7 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { (90.0, 12.0, 24.0), (12.0, 17.0, 22.0), (24.0, 22.0, 30.0)) - assert(G.toBreeze === expected) + assert(G.asBreeze === expected) } test("svd") { @@ -128,8 +128,8 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(svd.U.isInstanceOf[IndexedRowMatrix]) val localA = A.toBreeze() val U = svd.U.toBreeze() - val s = svd.s.toBreeze.asInstanceOf[BDV[Double]] - val V = svd.V.toBreeze.asInstanceOf[BDM[Double]] + val s = svd.s.asBreeze.asInstanceOf[BDV[Double]] + val V = svd.V.asBreeze.asInstanceOf[BDM[Double]] assert(closeToZero(U.t * U - BDM.eye[Double](n))) assert(closeToZero(V.t * V - BDM.eye[Double](n))) assert(closeToZero(U * brzDiag(s) * V.t - localA)) @@ -155,7 +155,7 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { test("similar columns") { val A = new IndexedRowMatrix(indexedRows) - val gram = A.computeGramianMatrix().toBreeze.toDenseMatrix + val gram = A.computeGramianMatrix().asBreeze.toDenseMatrix val G = A.columnSimilarities().toBreeze() diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index 2dff52c601..7c4c6d8409 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -96,7 +96,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { Matrices.dense(n, n, Array(126.0, 54.0, 72.0, 54.0, 66.0, 78.0, 72.0, 78.0, 94.0)) for (mat <- Seq(denseMat, sparseMat)) { val G = mat.computeGramianMatrix() - assert(G.toBreeze === expected.toBreeze) + assert(G.asBreeze === expected.asBreeze) } } @@ -153,8 +153,8 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(V.numRows === n) assert(V.numCols === k) assertColumnEqualUpToSign(U.toBreeze(), localU, k) - assertColumnEqualUpToSign(V.toBreeze.asInstanceOf[BDM[Double]], localV, k) - assert(closeToZero(s.toBreeze.asInstanceOf[BDV[Double]] - localSigma(0 until k))) + assertColumnEqualUpToSign(V.asBreeze.asInstanceOf[BDM[Double]], localV, k) + assert(closeToZero(s.asBreeze.asInstanceOf[BDV[Double]] - localSigma(0 until k))) } } val svdWithoutU = mat.computeSVD(1, computeU = false, 1e-9, 300, 1e-10, mode) @@ -207,7 +207,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { val (pc, expVariance) = mat.computePrincipalComponentsAndExplainedVariance(k) assert(pc.numRows === n) assert(pc.numCols === k) - assertColumnEqualUpToSign(pc.toBreeze.asInstanceOf[BDM[Double]], principalComponents, k) + assertColumnEqualUpToSign(pc.asBreeze.asInstanceOf[BDM[Double]], principalComponents, k) assert( closeToZero(BDV(expVariance.toArray) - BDV(Arrays.copyOfRange(explainedVariance.data, 0, k)))) @@ -256,12 +256,12 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { val calcQ = result.Q val calcR = result.R assert(closeToZero(abs(expected.q) - abs(calcQ.toBreeze()))) - assert(closeToZero(abs(expected.r) - abs(calcR.toBreeze.asInstanceOf[BDM[Double]]))) + assert(closeToZero(abs(expected.r) - abs(calcR.asBreeze.asInstanceOf[BDM[Double]]))) assert(closeToZero(calcQ.multiply(calcR).toBreeze - mat.toBreeze())) // Decomposition without computing Q val rOnly = mat.tallSkinnyQR(computeQ = false) assert(rOnly.Q == null) - assert(closeToZero(abs(expected.r) - abs(rOnly.R.toBreeze.asInstanceOf[BDM[Double]]))) + assert(closeToZero(abs(expected.r) - abs(rOnly.R.asBreeze.asInstanceOf[BDM[Double]]))) } } @@ -269,7 +269,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { for (mat <- Seq(denseMat, sparseMat)) { val result = mat.computeCovariance() val expected = breeze.linalg.cov(mat.toBreeze()) - assert(closeToZero(abs(expected) - abs(result.toBreeze.asInstanceOf[BDM[Double]]))) + assert(closeToZero(abs(expected) - abs(result.asBreeze.asInstanceOf[BDM[Double]]))) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala index 700f803490..e32767edb1 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala @@ -104,8 +104,8 @@ class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Log (Double.NaN, Double.NaN, 1.00000000, Double.NaN), (0.40047142, 0.91359586, Double.NaN, 1.0000000)) // scalastyle:on - assert(matrixApproxEqual(defaultMat.toBreeze, expected)) - assert(matrixApproxEqual(pearsonMat.toBreeze, expected)) + assert(matrixApproxEqual(defaultMat.asBreeze, expected)) + assert(matrixApproxEqual(pearsonMat.asBreeze, expected)) } test("corr(X) spearman") { @@ -118,7 +118,7 @@ class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Log (Double.NaN, Double.NaN, 1.00000000, Double.NaN), (0.4000000, 0.9486833, Double.NaN, 1.0000000)) // scalastyle:on - assert(matrixApproxEqual(spearmanMat.toBreeze, expected)) + assert(matrixApproxEqual(spearmanMat.asBreeze, expected)) } test("method identification") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index 0c6aabf192..7b6bfee00c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -53,13 +53,13 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { val norm2 = Vectors.norm(v2, 2.0) val v3 = Vectors.sparse(n, indices, indices.map(i => a(i) + 0.5)) val norm3 = Vectors.norm(v3, 2.0) - val squaredDist = breezeSquaredDistance(v1.toBreeze, v2.toBreeze) + val squaredDist = breezeSquaredDistance(v1.asBreeze, v2.asBreeze) val fastSquaredDist1 = fastSquaredDistance(v1, norm1, v2, norm2, precision) assert((fastSquaredDist1 - squaredDist) <= precision * squaredDist, s"failed with m = $m") val fastSquaredDist2 = fastSquaredDistance(v1, norm1, Vectors.dense(v2.toArray), norm2, precision) assert((fastSquaredDist2 - squaredDist) <= precision * squaredDist, s"failed with m = $m") - val squaredDist2 = breezeSquaredDistance(v2.toBreeze, v3.toBreeze) + val squaredDist2 = breezeSquaredDistance(v2.asBreeze, v3.asBreeze) val fastSquaredDist3 = fastSquaredDistance(v2, norm2, v3, norm3, precision) assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, s"failed with m = $m") @@ -67,7 +67,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { val v4 = Vectors.sparse(n, indices.slice(0, m - 10), indices.map(i => a(i) + 0.5).slice(0, m - 10)) val norm4 = Vectors.norm(v4, 2.0) - val squaredDist = breezeSquaredDistance(v2.toBreeze, v4.toBreeze) + val squaredDist = breezeSquaredDistance(v2.asBreeze, v4.asBreeze) val fastSquaredDist = fastSquaredDistance(v2, norm2, v4, norm4, precision) assert((fastSquaredDist - squaredDist) <= precision * squaredDist, s"failed with m = $m") diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 73debe9da4..9d0d9b1be0 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -775,6 +775,9 @@ object MimaExcludes { ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.AlphaComponent"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.Experimental"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.annotation.DeveloperApi") + ) ++ Seq( + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.asBreeze"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.asBreeze") ) case v if v.startsWith("1.6") => Seq( -- cgit v1.2.3