aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala/org/apache
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2016-04-21 16:50:09 -0700
committerDB Tsai <dbt@netflix.com>2016-04-21 16:50:09 -0700
commitf25a3ea8d3ee6972efb925826981918549deacaa (patch)
tree5365b5f162b41fba8e1786634ccc2c8d585fd47c /mllib/src/main/scala/org/apache
parente2b5647ab92eb478b3f7b36a0ce6faf83e24c0e5 (diff)
downloadspark-f25a3ea8d3ee6972efb925826981918549deacaa.tar.gz
spark-f25a3ea8d3ee6972efb925826981918549deacaa.tar.bz2
spark-f25a3ea8d3ee6972efb925826981918549deacaa.zip
[SPARK-14734][ML][MLLIB] Added asML, fromML methods for all spark.mllib Vector, Matrix types
## What changes were proposed in this pull request? For maintaining wrappers around spark.mllib algorithms in spark.ml, it will be useful to have ```private[spark]``` methods for converting from one linear algebra representation to another. This PR adds toNew, fromNew methods for all spark.mllib Vector and Matrix types. ## How was this patch tested? Unit tests for all conversions Author: Joseph K. Bradley <joseph@databricks.com> Closes #12504 from jkbradley/linalg-conversions.
Diffstat (limited to 'mllib/src/main/scala/org/apache')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala35
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala33
2 files changed, 67 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 8c09b69b3c..bb5d6d9d51 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -24,7 +24,8 @@ import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, Has
import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import org.apache.spark.annotation.{DeveloperApi, Since}
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.{linalg => newlinalg}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.catalyst.util.GenericArrayData
@@ -158,6 +159,12 @@ sealed trait Matrix extends Serializable {
*/
@Since("1.5.0")
def numActives: Int
+
+ /**
+ * Convert this matrix to the new mllib-local representation.
+ * This does NOT copy the data; it copies references.
+ */
+ private[spark] def asML: newlinalg.Matrix
}
private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
@@ -419,6 +426,10 @@ class DenseMatrix @Since("1.3.0") (
}
}
}
+
+ private[spark] override def asML: newlinalg.DenseMatrix = {
+ new newlinalg.DenseMatrix(numRows, numCols, values, isTransposed)
+ }
}
/**
@@ -515,6 +526,11 @@ object DenseMatrix {
}
matrix
}
+
+ /** Convert new linalg type to spark.mllib type. Light copy; only copies references */
+ private[spark] def fromML(m: newlinalg.DenseMatrix): DenseMatrix = {
+ new DenseMatrix(m.numRows, m.numCols, m.values, m.isTransposed)
+ }
}
/**
@@ -721,6 +737,10 @@ class SparseMatrix @Since("1.3.0") (
}
}
}
+
+ private[spark] override def asML: newlinalg.SparseMatrix = {
+ new newlinalg.SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed)
+ }
}
/**
@@ -895,6 +915,11 @@ object SparseMatrix {
SparseMatrix.fromCOO(n, n, nnzVals.map(v => (v._2, v._2, v._1)))
}
}
+
+ /** Convert new linalg type to spark.mllib type. Light copy; only copies references */
+ private[spark] def fromML(m: newlinalg.SparseMatrix): SparseMatrix = {
+ new SparseMatrix(m.numRows, m.numCols, m.colPtrs, m.rowIndices, m.values, m.isTransposed)
+ }
}
/**
@@ -1177,4 +1202,12 @@ object Matrices {
SparseMatrix.fromCOO(numRows, numCols, entries)
}
}
+
+ /** Convert new linalg type to spark.mllib type. Light copy; only copies references */
+ private[spark] def fromML(m: newlinalg.Matrix): Matrix = m match {
+ case dm: newlinalg.DenseMatrix =>
+ DenseMatrix.fromML(dm)
+ case sm: newlinalg.SparseMatrix =>
+ SparseMatrix.fromML(sm)
+ }
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 5812cdde2c..5ec83e8d5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -30,6 +30,7 @@ import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
import org.apache.spark.SparkException
import org.apache.spark.annotation.{AlphaComponent, Since}
+import org.apache.spark.ml.{linalg => newlinalg}
import org.apache.spark.mllib.util.NumericParser
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
@@ -180,6 +181,12 @@ sealed trait Vector extends Serializable {
*/
@Since("1.6.0")
def toJson: String
+
+ /**
+ * Convert this vector to the new mllib-local representation.
+ * This does NOT copy the data; it copies references.
+ */
+ private[spark] def asML: newlinalg.Vector
}
/**
@@ -573,6 +580,14 @@ object Vectors {
/** Max number of nonzero entries used in computing hash code. */
private[linalg] val MAX_HASH_NNZ = 128
+
+ /** Convert new linalg type to spark.mllib type. Light copy; only copies references */
+ private[spark] def fromML(v: newlinalg.Vector): Vector = v match {
+ case dv: newlinalg.DenseVector =>
+ DenseVector.fromML(dv)
+ case sv: newlinalg.SparseVector =>
+ SparseVector.fromML(sv)
+ }
}
/**
@@ -686,6 +701,10 @@ class DenseVector @Since("1.0.0") (
val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
compact(render(jValue))
}
+
+ private[spark] override def asML: newlinalg.DenseVector = {
+ new newlinalg.DenseVector(values)
+ }
}
@Since("1.3.0")
@@ -694,6 +713,11 @@ object DenseVector {
/** Extracts the value array from a dense vector. */
@Since("1.3.0")
def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
+
+ /** Convert new linalg type to spark.mllib type. Light copy; only copies references */
+ private[spark] def fromML(v: newlinalg.DenseVector): DenseVector = {
+ new DenseVector(v.values)
+ }
}
/**
@@ -882,6 +906,10 @@ class SparseVector @Since("1.0.0") (
("values" -> values.toSeq)
compact(render(jValue))
}
+
+ private[spark] override def asML: newlinalg.SparseVector = {
+ new newlinalg.SparseVector(size, indices, values)
+ }
}
@Since("1.3.0")
@@ -889,4 +917,9 @@ object SparseVector {
@Since("1.3.0")
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
Some((sv.size, sv.indices, sv.values))
+
+ /** Convert new linalg type to spark.mllib type. Light copy; only copies references */
+ private[spark] def fromML(v: newlinalg.SparseVector): SparseVector = {
+ new SparseVector(v.size, v.indices, v.values)
+ }
}