aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test/scala
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2016-04-21 16:50:09 -0700
committerDB Tsai <dbt@netflix.com>2016-04-21 16:50:09 -0700
commitf25a3ea8d3ee6972efb925826981918549deacaa (patch)
tree5365b5f162b41fba8e1786634ccc2c8d585fd47c /mllib/src/test/scala
parente2b5647ab92eb478b3f7b36a0ce6faf83e24c0e5 (diff)
downloadspark-f25a3ea8d3ee6972efb925826981918549deacaa.tar.gz
spark-f25a3ea8d3ee6972efb925826981918549deacaa.tar.bz2
spark-f25a3ea8d3ee6972efb925826981918549deacaa.zip
[SPARK-14734][ML][MLLIB] Added asML, fromML methods for all spark.mllib Vector, Matrix types
## What changes were proposed in this pull request? For maintaining wrappers around spark.mllib algorithms in spark.ml, it will be useful to have ```private[spark]``` methods for converting from one linear algebra representation to another. This PR adds toNew, fromNew methods for all spark.mllib Vector and Matrix types. ## How was this patch tested? Unit tests for all conversions Author: Joseph K. Bradley <joseph@databricks.com> Closes #12504 from jkbradley/linalg-conversions.
Diffstat (limited to 'mllib/src/test/scala')
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala39
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala30
2 files changed, 68 insertions, 1 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index e289724cda..b7df02e6c0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -19,12 +19,14 @@ package org.apache.spark.mllib.linalg
import java.util.Random
+import scala.collection.mutable.{Map => MutableMap}
+
import breeze.linalg.{CSCMatrix, Matrix => BM}
import org.mockito.Mockito.when
import org.scalatest.mock.MockitoSugar._
-import scala.collection.mutable.{Map => MutableMap}
import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.{linalg => newlinalg}
import org.apache.spark.mllib.util.TestingUtils._
class MatricesSuite extends SparkFunSuite {
@@ -523,4 +525,39 @@ class MatricesSuite extends SparkFunSuite {
assert(m.transpose.colIter.toSeq === rows)
}
}
+
+ test("conversions between new local linalg and mllib linalg") {
+ val dm: DenseMatrix = new DenseMatrix(3, 2, Array(0.0, 0.0, 1.0, 0.0, 2.0, 3.5))
+ val sm: SparseMatrix = dm.toSparse
+ val sm0: Matrix = sm.asInstanceOf[Matrix]
+ val dm0: Matrix = dm.asInstanceOf[Matrix]
+
+ def compare(oldM: Matrix, newM: newlinalg.Matrix): Unit = {
+ assert(oldM.toArray === newM.toArray)
+ assert(oldM.numCols === newM.numCols)
+ assert(oldM.numRows === newM.numRows)
+ }
+
+ val newSM: newlinalg.SparseMatrix = sm.asML
+ val newDM: newlinalg.DenseMatrix = dm.asML
+ val newSM0: newlinalg.Matrix = sm0.asML
+ val newDM0: newlinalg.Matrix = dm0.asML
+ assert(newSM0.isInstanceOf[newlinalg.SparseMatrix])
+ assert(newDM0.isInstanceOf[newlinalg.DenseMatrix])
+ compare(sm, newSM)
+ compare(dm, newDM)
+ compare(sm0, newSM0)
+ compare(dm0, newDM0)
+
+ val oldSM: SparseMatrix = SparseMatrix.fromML(newSM)
+ val oldDM: DenseMatrix = DenseMatrix.fromML(newDM)
+ val oldSM0: Matrix = Matrices.fromML(newSM0)
+ val oldDM0: Matrix = Matrices.fromML(newDM0)
+ assert(oldSM0.isInstanceOf[SparseMatrix])
+ assert(oldDM0.isInstanceOf[DenseMatrix])
+ compare(oldSM, newSM)
+ compare(oldDM, newDM)
+ compare(oldSM0, newSM0)
+ compare(oldDM0, newDM0)
+ }
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index e5567492a2..a7c1a07604 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -24,6 +24,7 @@ import org.json4s.jackson.JsonMethods.{parse => parseJson}
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.internal.Logging
+import org.apache.spark.ml.{linalg => newlinalg}
import org.apache.spark.mllib.util.TestingUtils._
class VectorsSuite extends SparkFunSuite with Logging {
@@ -392,4 +393,33 @@ class VectorsSuite extends SparkFunSuite with Logging {
assert(u === v, "toJson/fromJson should preserve vector values.")
}
}
+
+ test("conversions between new local linalg and mllib linalg") {
+ val dv: DenseVector = new DenseVector(Array(1.0, 2.0, 3.5))
+ val sv: SparseVector = new SparseVector(5, Array(1, 2, 4), Array(1.1, 2.2, 4.4))
+ val sv0: Vector = sv.asInstanceOf[Vector]
+ val dv0: Vector = dv.asInstanceOf[Vector]
+
+ val newSV: newlinalg.SparseVector = sv.asML
+ val newDV: newlinalg.DenseVector = dv.asML
+ val newSV0: newlinalg.Vector = sv0.asML
+ val newDV0: newlinalg.Vector = dv0.asML
+ assert(newSV0.isInstanceOf[newlinalg.SparseVector])
+ assert(newDV0.isInstanceOf[newlinalg.DenseVector])
+ assert(sv.toArray === newSV.toArray)
+ assert(dv.toArray === newDV.toArray)
+ assert(sv0.toArray === newSV0.toArray)
+ assert(dv0.toArray === newDV0.toArray)
+
+ val oldSV: SparseVector = SparseVector.fromML(newSV)
+ val oldDV: DenseVector = DenseVector.fromML(newDV)
+ val oldSV0: Vector = Vectors.fromML(newSV0)
+ val oldDV0: Vector = Vectors.fromML(newDV0)
+ assert(oldSV0.isInstanceOf[SparseVector])
+ assert(oldDV0.isInstanceOf[DenseVector])
+ assert(oldSV.toArray === newSV.toArray)
+ assert(oldDV.toArray === newDV.toArray)
+ assert(oldSV0.toArray === newSV0.toArray)
+ assert(oldDV0.toArray === newDV0.toArray)
+ }
}