diff options
author | Joseph K. Bradley <joseph@databricks.com> | 2016-04-21 16:50:09 -0700 |
---|---|---|
committer | DB Tsai <dbt@netflix.com> | 2016-04-21 16:50:09 -0700 |
commit | f25a3ea8d3ee6972efb925826981918549deacaa (patch) | |
tree | 5365b5f162b41fba8e1786634ccc2c8d585fd47c /mllib/src/test/scala | |
parent | e2b5647ab92eb478b3f7b36a0ce6faf83e24c0e5 (diff) | |
download | spark-f25a3ea8d3ee6972efb925826981918549deacaa.tar.gz spark-f25a3ea8d3ee6972efb925826981918549deacaa.tar.bz2 spark-f25a3ea8d3ee6972efb925826981918549deacaa.zip |
[SPARK-14734][ML][MLLIB] Added asML, fromML methods for all spark.mllib Vector, Matrix types
## What changes were proposed in this pull request?
For maintaining wrappers around spark.mllib algorithms in spark.ml, it will be useful to have ```private[spark]``` methods for converting from one linear algebra representation to another.
This PR adds toNew, fromNew methods for all spark.mllib Vector and Matrix types.
## How was this patch tested?
Unit tests for all conversions
Author: Joseph K. Bradley <joseph@databricks.com>
Closes #12504 from jkbradley/linalg-conversions.
Diffstat (limited to 'mllib/src/test/scala')
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala | 39 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala | 30 |
2 files changed, 68 insertions, 1 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index e289724cda..b7df02e6c0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -19,12 +19,14 @@ package org.apache.spark.mllib.linalg import java.util.Random +import scala.collection.mutable.{Map => MutableMap} + import breeze.linalg.{CSCMatrix, Matrix => BM} import org.mockito.Mockito.when import org.scalatest.mock.MockitoSugar._ -import scala.collection.mutable.{Map => MutableMap} import org.apache.spark.SparkFunSuite +import org.apache.spark.ml.{linalg => newlinalg} import org.apache.spark.mllib.util.TestingUtils._ class MatricesSuite extends SparkFunSuite { @@ -523,4 +525,39 @@ class MatricesSuite extends SparkFunSuite { assert(m.transpose.colIter.toSeq === rows) } } + + test("conversions between new local linalg and mllib linalg") { + val dm: DenseMatrix = new DenseMatrix(3, 2, Array(0.0, 0.0, 1.0, 0.0, 2.0, 3.5)) + val sm: SparseMatrix = dm.toSparse + val sm0: Matrix = sm.asInstanceOf[Matrix] + val dm0: Matrix = dm.asInstanceOf[Matrix] + + def compare(oldM: Matrix, newM: newlinalg.Matrix): Unit = { + assert(oldM.toArray === newM.toArray) + assert(oldM.numCols === newM.numCols) + assert(oldM.numRows === newM.numRows) + } + + val newSM: newlinalg.SparseMatrix = sm.asML + val newDM: newlinalg.DenseMatrix = dm.asML + val newSM0: newlinalg.Matrix = sm0.asML + val newDM0: newlinalg.Matrix = dm0.asML + assert(newSM0.isInstanceOf[newlinalg.SparseMatrix]) + assert(newDM0.isInstanceOf[newlinalg.DenseMatrix]) + compare(sm, newSM) + compare(dm, newDM) + compare(sm0, newSM0) + compare(dm0, newDM0) + + val oldSM: SparseMatrix = SparseMatrix.fromML(newSM) + val oldDM: DenseMatrix = DenseMatrix.fromML(newDM) + val oldSM0: Matrix = Matrices.fromML(newSM0) + val oldDM0: Matrix = Matrices.fromML(newDM0) + assert(oldSM0.isInstanceOf[SparseMatrix]) + assert(oldDM0.isInstanceOf[DenseMatrix]) + compare(oldSM, newSM) + compare(oldDM, newDM) + compare(oldSM0, newSM0) + compare(oldDM0, newDM0) + } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index e5567492a2..a7c1a07604 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -24,6 +24,7 @@ import org.json4s.jackson.JsonMethods.{parse => parseJson} import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.internal.Logging +import org.apache.spark.ml.{linalg => newlinalg} import org.apache.spark.mllib.util.TestingUtils._ class VectorsSuite extends SparkFunSuite with Logging { @@ -392,4 +393,33 @@ class VectorsSuite extends SparkFunSuite with Logging { assert(u === v, "toJson/fromJson should preserve vector values.") } } + + test("conversions between new local linalg and mllib linalg") { + val dv: DenseVector = new DenseVector(Array(1.0, 2.0, 3.5)) + val sv: SparseVector = new SparseVector(5, Array(1, 2, 4), Array(1.1, 2.2, 4.4)) + val sv0: Vector = sv.asInstanceOf[Vector] + val dv0: Vector = dv.asInstanceOf[Vector] + + val newSV: newlinalg.SparseVector = sv.asML + val newDV: newlinalg.DenseVector = dv.asML + val newSV0: newlinalg.Vector = sv0.asML + val newDV0: newlinalg.Vector = dv0.asML + assert(newSV0.isInstanceOf[newlinalg.SparseVector]) + assert(newDV0.isInstanceOf[newlinalg.DenseVector]) + assert(sv.toArray === newSV.toArray) + assert(dv.toArray === newDV.toArray) + assert(sv0.toArray === newSV0.toArray) + assert(dv0.toArray === newDV0.toArray) + + val oldSV: SparseVector = SparseVector.fromML(newSV) + val oldDV: DenseVector = DenseVector.fromML(newDV) + val oldSV0: Vector = Vectors.fromML(newSV0) + val oldDV0: Vector = Vectors.fromML(newDV0) + assert(oldSV0.isInstanceOf[SparseVector]) + assert(oldDV0.isInstanceOf[DenseVector]) + assert(oldSV.toArray === newSV.toArray) + assert(oldDV.toArray === newDV.toArray) + assert(oldSV0.toArray === newSV0.toArray) + assert(oldDV0.toArray === newDV0.toArray) + } } |