diff options
author | Joseph K. Bradley <joseph@databricks.com> | 2015-05-21 13:05:48 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-05-21 13:05:48 -0700 |
commit | 6d75ed7e5ccf6c58143de4608115f9a2b3ff6cf4 (patch) | |
tree | 5d75b08360d41efbeb804a71f80b859cbb802d0b /mllib | |
parent | 15680aeed425c900a5de34d12b61929d1e5df607 (diff) | |
download | spark-6d75ed7e5ccf6c58143de4608115f9a2b3ff6cf4.tar.gz spark-6d75ed7e5ccf6c58143de4608115f9a2b3ff6cf4.tar.bz2 spark-6d75ed7e5ccf6c58143de4608115f9a2b3ff6cf4.zip |
[SPARK-7585] [ML] [DOC] VectorIndexer user guide section
Added VectorIndexer section to ML user guide. Also added javaCategoryMaps() method and Java unit test for it.
CC: mengxr
Author: Joseph K. Bradley <joseph@databricks.com>
Closes #6255 from jkbradley/vector-indexer-guide and squashes the following commits:
dbb8c4c [Joseph K. Bradley] simplified VectorIndexerModel.javaCategoryMaps
f692084 [Joseph K. Bradley] Added VectorIndexer section to ML user guide. Also added javaCategoryMaps() method and Java unit test for it.
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala | 10 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java | 4 |
2 files changed, 13 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 6d1d0524e5..e238fb310e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -17,6 +17,11 @@ package org.apache.spark.ml.feature +import java.lang.{Double => JDouble, Integer => JInt} +import java.util.{Map => JMap} + +import scala.collection.JavaConverters._ + import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.attribute._ @@ -248,6 +253,11 @@ class VectorIndexerModel private[ml] ( val categoryMaps: Map[Int, Map[Double, Int]]) extends Model[VectorIndexerModel] with VectorIndexerParams { + /** Java-friendly version of [[categoryMaps]] */ + def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = { + categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]] + } + /** * Pre-computed feature attributes, with some missing info. * In transform(), set attribute name and other info, if available. diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java index 161100134c..c7ae5468b9 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java @@ -19,6 +19,7 @@ package org.apache.spark.ml.feature; import java.io.Serializable; import java.util.List; +import java.util.Map; import org.junit.After; import org.junit.Assert; @@ -64,7 +65,8 @@ public class JavaVectorIndexerSuite implements Serializable { .setMaxCategories(2); VectorIndexerModel model = indexer.fit(data); Assert.assertEquals(model.numFeatures(), 2); - Assert.assertEquals(model.categoryMaps().size(), 1); + Map<Integer, Map<Double, Integer>> categoryMaps = model.javaCategoryMaps(); + Assert.assertEquals(categoryMaps.size(), 1); DataFrame indexedData = model.transform(data); } } |