diff options
author | Joseph K. Bradley <joseph@databricks.com> | 2015-08-20 15:01:31 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-08-20 15:01:31 -0700 |
commit | eaafe139f881d6105996373c9b11f2ccd91b5b3e (patch) | |
tree | bb935d8fb5b9aca4478db5fbec2ffd13a229d490 /mllib/src/test/java/org | |
parent | 7cfc0750e14f2c1b3847e4720cc02150253525a9 (diff) | |
download | spark-eaafe139f881d6105996373c9b11f2ccd91b5b3e.tar.gz spark-eaafe139f881d6105996373c9b11f2ccd91b5b3e.tar.bz2 spark-eaafe139f881d6105996373c9b11f2ccd91b5b3e.zip |
[SPARK-9245] [MLLIB] LDA topic assignments
For each (document, term) pair, return top topic. Note that instances of (doc, term) pairs within a document (a.k.a. "tokens") are exchangeable, so we should provide an estimate per document-term, rather than per token.
CC: rotationsymmetry mengxr
Author: Joseph K. Bradley <joseph@databricks.com>
Closes #8329 from jkbradley/lda-topic-assignments.
Diffstat (limited to 'mllib/src/test/java/org')
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 6e91cde2ea..3fea359a3b 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -134,6 +134,13 @@ public class JavaLDASuite implements Serializable { double[] topicWeights = topTopics._3(); assertEquals(3, topicIndices.length); assertEquals(3, topicWeights.length); + + // Check: topTopicAssignments + Tuple3<Long, int[], int[]> topicAssignment = model.javaTopicAssignments().first(); + Long docId2 = topicAssignment._1(); + int[] termIndices2 = topicAssignment._2(); + int[] topicIndices2 = topicAssignment._3(); + assertEquals(termIndices2.length, topicIndices2.length); } @Test |