diff options
author | Xiangrui Meng <meng@databricks.com> | 2016-04-30 06:30:39 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-04-30 06:30:39 -0700 |
commit | 0847fe4eb346e3c2060eb0f3680610bec06115e7 (patch) | |
tree | b64c78629130f667cd6529d0ece0eedf2b0a4d2c /mllib-local | |
parent | 1192fe4cd2a934790dc1ff2d459cf380e67335b2 (diff) | |
download | spark-0847fe4eb346e3c2060eb0f3680610bec06115e7.tar.gz spark-0847fe4eb346e3c2060eb0f3680610bec06115e7.tar.bz2 spark-0847fe4eb346e3c2060eb0f3680610bec06115e7.zip |
[SPARK-14653][ML] Remove json4s from mllib-local
## What changes were proposed in this pull request?
This PR moves Vector.toJson/fromJson to ml.linalg.VectorEncoder under mllib/ to keep mllib-local's dependency minimal. The json encoding is used by Params. So we still need this feature in SPARK-14615, where we will switch to ml.linalg in spark.ml APIs.
## How was this patch tested?
Copied existing unit tests over.
cc; dbtsai
Author: Xiangrui Meng <meng@databricks.com>
Closes #12802 from mengxr/SPARK-14653.
Diffstat (limited to 'mllib-local')
-rw-r--r-- | mllib-local/pom.xml | 4 | ||||
-rw-r--r-- | mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala | 41 | ||||
-rw-r--r-- | mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala | 17 |
3 files changed, 0 insertions, 62 deletions
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 60b615a07f..68f15dd905 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -49,10 +49,6 @@ <scope>test</scope> </dependency> <dependency> - <groupId>org.json4s</groupId> - <artifactId>json4s-jackson_${scala.binary.version}</artifactId> - </dependency> - <dependency> <groupId>org.mockito</groupId> <artifactId>mockito-core</artifactId> <scope>test</scope> diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 4275a22ae0..c0d112d2c5 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -24,9 +24,6 @@ import scala.annotation.varargs import scala.collection.JavaConverters._ import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV} -import org.json4s.DefaultFormats -import org.json4s.JsonDSL._ -import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render} /** * Represents a numeric vector, whose index type is Int and value type is Double. @@ -153,11 +150,6 @@ sealed trait Vector extends Serializable { * Returns -1 if vector has length 0. */ def argmax: Int - - /** - * Converts the vector to a JSON string. - */ - def toJson: String } /** @@ -234,26 +226,6 @@ object Vectors { } /** - * Parses the JSON representation of a vector into a [[Vector]]. - */ - def fromJson(json: String): Vector = { - implicit val formats = DefaultFormats - val jValue = parseJson(json) - (jValue \ "type").extract[Int] match { - case 0 => // sparse - val size = (jValue \ "size").extract[Int] - val indices = (jValue \ "indices").extract[Seq[Int]].toArray - val values = (jValue \ "values").extract[Seq[Double]].toArray - sparse(size, indices, values) - case 1 => // dense - val values = (jValue \ "values").extract[Seq[Double]].toArray - dense(values) - case _ => - throw new IllegalArgumentException(s"Cannot parse $json into a vector.") - } - } - - /** * Creates a vector instance from a breeze vector. */ private[spark] def fromBreeze(breezeVector: BV[Double]): Vector = { @@ -541,11 +513,6 @@ class DenseVector (val values: Array[Double]) extends Vector { maxIdx } } - - override def toJson: String = { - val jValue = ("type" -> 1) ~ ("values" -> values.toSeq) - compact(render(jValue)) - } } object DenseVector { @@ -724,14 +691,6 @@ class SparseVector ( }.unzip new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray) } - - override def toJson: String = { - val jValue = ("type" -> 0) ~ - ("size" -> size) ~ - ("indices" -> indices.toSeq) ~ - ("values" -> values.toSeq) - compact(render(jValue)) - } } object SparseVector { diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala index 504be36413..887814b5e7 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.ml.linalg import scala.util.Random import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM} -import org.json4s.jackson.JsonMethods.{parse => parseJson} import org.apache.spark.ml.SparkMLFunSuite import org.apache.spark.ml.util.TestingUtils._ @@ -339,20 +338,4 @@ class VectorsSuite extends SparkMLFunSuite { assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2))) assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4))) } - - test("toJson/fromJson") { - val sv0 = Vectors.sparse(0, Array.empty, Array.empty) - val sv1 = Vectors.sparse(1, Array.empty, Array.empty) - val sv2 = Vectors.sparse(2, Array(1), Array(2.0)) - val dv0 = Vectors.dense(Array.empty[Double]) - val dv1 = Vectors.dense(1.0) - val dv2 = Vectors.dense(0.0, 2.0) - for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) { - val json = v.toJson - parseJson(json) // `json` should be a valid JSON string - val u = Vectors.fromJson(json) - assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.") - assert(u === v, "toJson/fromJson should preserve vector values.") - } - } } |