aboutsummaryrefslogtreecommitdiff
path: root/mllib-local
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2016-04-30 06:30:39 -0700
committerXiangrui Meng <meng@databricks.com>2016-04-30 06:30:39 -0700
commit0847fe4eb346e3c2060eb0f3680610bec06115e7 (patch)
treeb64c78629130f667cd6529d0ece0eedf2b0a4d2c /mllib-local
parent1192fe4cd2a934790dc1ff2d459cf380e67335b2 (diff)
downloadspark-0847fe4eb346e3c2060eb0f3680610bec06115e7.tar.gz
spark-0847fe4eb346e3c2060eb0f3680610bec06115e7.tar.bz2
spark-0847fe4eb346e3c2060eb0f3680610bec06115e7.zip
[SPARK-14653][ML] Remove json4s from mllib-local
## What changes were proposed in this pull request? This PR moves Vector.toJson/fromJson to ml.linalg.VectorEncoder under mllib/ to keep mllib-local's dependency minimal. The json encoding is used by Params. So we still need this feature in SPARK-14615, where we will switch to ml.linalg in spark.ml APIs. ## How was this patch tested? Copied existing unit tests over. cc; dbtsai Author: Xiangrui Meng <meng@databricks.com> Closes #12802 from mengxr/SPARK-14653.
Diffstat (limited to 'mllib-local')
-rw-r--r--mllib-local/pom.xml4
-rw-r--r--mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala41
-rw-r--r--mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala17
3 files changed, 0 insertions, 62 deletions
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 60b615a07f..68f15dd905 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -49,10 +49,6 @@
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.json4s</groupId>
- <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
- </dependency>
- <dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 4275a22ae0..c0d112d2c5 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -24,9 +24,6 @@ import scala.annotation.varargs
import scala.collection.JavaConverters._
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
-import org.json4s.DefaultFormats
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
/**
* Represents a numeric vector, whose index type is Int and value type is Double.
@@ -153,11 +150,6 @@ sealed trait Vector extends Serializable {
* Returns -1 if vector has length 0.
*/
def argmax: Int
-
- /**
- * Converts the vector to a JSON string.
- */
- def toJson: String
}
/**
@@ -234,26 +226,6 @@ object Vectors {
}
/**
- * Parses the JSON representation of a vector into a [[Vector]].
- */
- def fromJson(json: String): Vector = {
- implicit val formats = DefaultFormats
- val jValue = parseJson(json)
- (jValue \ "type").extract[Int] match {
- case 0 => // sparse
- val size = (jValue \ "size").extract[Int]
- val indices = (jValue \ "indices").extract[Seq[Int]].toArray
- val values = (jValue \ "values").extract[Seq[Double]].toArray
- sparse(size, indices, values)
- case 1 => // dense
- val values = (jValue \ "values").extract[Seq[Double]].toArray
- dense(values)
- case _ =>
- throw new IllegalArgumentException(s"Cannot parse $json into a vector.")
- }
- }
-
- /**
* Creates a vector instance from a breeze vector.
*/
private[spark] def fromBreeze(breezeVector: BV[Double]): Vector = {
@@ -541,11 +513,6 @@ class DenseVector (val values: Array[Double]) extends Vector {
maxIdx
}
}
-
- override def toJson: String = {
- val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
- compact(render(jValue))
- }
}
object DenseVector {
@@ -724,14 +691,6 @@ class SparseVector (
}.unzip
new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray)
}
-
- override def toJson: String = {
- val jValue = ("type" -> 0) ~
- ("size" -> size) ~
- ("indices" -> indices.toSeq) ~
- ("values" -> values.toSeq)
- compact(render(jValue))
- }
}
object SparseVector {
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
index 504be36413..887814b5e7 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.ml.linalg
import scala.util.Random
import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
-import org.json4s.jackson.JsonMethods.{parse => parseJson}
import org.apache.spark.ml.SparkMLFunSuite
import org.apache.spark.ml.util.TestingUtils._
@@ -339,20 +338,4 @@ class VectorsSuite extends SparkMLFunSuite {
assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2)))
assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4)))
}
-
- test("toJson/fromJson") {
- val sv0 = Vectors.sparse(0, Array.empty, Array.empty)
- val sv1 = Vectors.sparse(1, Array.empty, Array.empty)
- val sv2 = Vectors.sparse(2, Array(1), Array(2.0))
- val dv0 = Vectors.dense(Array.empty[Double])
- val dv1 = Vectors.dense(1.0)
- val dv2 = Vectors.dense(0.0, 2.0)
- for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) {
- val json = v.toJson
- parseJson(json) // `json` should be a valid JSON string
- val u = Vectors.fromJson(json)
- assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.")
- assert(u === v, "toJson/fromJson should preserve vector values.")
- }
- }
}