aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test/java
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2015-08-20 15:01:31 -0700
committerXiangrui Meng <meng@databricks.com>2015-08-20 15:01:31 -0700
commiteaafe139f881d6105996373c9b11f2ccd91b5b3e (patch)
treebb935d8fb5b9aca4478db5fbec2ffd13a229d490 /mllib/src/test/java
parent7cfc0750e14f2c1b3847e4720cc02150253525a9 (diff)
downloadspark-eaafe139f881d6105996373c9b11f2ccd91b5b3e.tar.gz
spark-eaafe139f881d6105996373c9b11f2ccd91b5b3e.tar.bz2
spark-eaafe139f881d6105996373c9b11f2ccd91b5b3e.zip
[SPARK-9245] [MLLIB] LDA topic assignments
For each (document, term) pair, return top topic. Note that instances of (doc, term) pairs within a document (a.k.a. "tokens") are exchangeable, so we should provide an estimate per document-term, rather than per token. CC: rotationsymmetry mengxr Author: Joseph K. Bradley <joseph@databricks.com> Closes #8329 from jkbradley/lda-topic-assignments.
Diffstat (limited to 'mllib/src/test/java')
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java7
1 files changed, 7 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
index 6e91cde2ea..3fea359a3b 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
@@ -134,6 +134,13 @@ public class JavaLDASuite implements Serializable {
double[] topicWeights = topTopics._3();
assertEquals(3, topicIndices.length);
assertEquals(3, topicWeights.length);
+
+ // Check: topTopicAssignments
+ Tuple3<Long, int[], int[]> topicAssignment = model.javaTopicAssignments().first();
+ Long docId2 = topicAssignment._1();
+ int[] termIndices2 = topicAssignment._2();
+ int[] topicIndices2 = topicAssignment._3();
+ assertEquals(termIndices2.length, topicIndices2.length);
}
@Test