aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-08-02 11:50:17 -0700
committerXiangrui Meng <meng@databricks.com>2015-08-02 11:50:17 -0700
commit66924ffa6bdb8e0df1b90b789cb7ad443377e729 (patch)
tree1ba157fd78b99c18d7773c52d389cd02bed858b3 /mllib/src/test
parent8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01 (diff)
downloadspark-66924ffa6bdb8e0df1b90b789cb7ad443377e729.tar.gz
spark-66924ffa6bdb8e0df1b90b789cb7ad443377e729.tar.bz2
spark-66924ffa6bdb8e0df1b90b789cb7ad443377e729.zip
[SPARK-9527] [MLLIB] add PrefixSpanModel and make PrefixSpan Java friendly
1. Use `PrefixSpanModel` to wrap the frequent sequences. 2. Define `FreqSequence` to wrap each frequent sequence, which contains a Java-friendly method `javaSequence` 3. Overload `run` for Java users. 4. Added a unit test in Java to check Java compatibility. zhangjiajin feynmanliang Author: Xiangrui Meng <meng@databricks.com> Closes #7869 from mengxr/SPARK-9527 and squashes the following commits: 4345594 [Xiangrui Meng] add PrefixSpanModel and make PrefixSpan Java friendly
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java67
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala8
2 files changed, 71 insertions, 4 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java
new file mode 100644
index 0000000000..34daf5fbde
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.fpm;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.fpm.PrefixSpan.FreqSequence;
+
+public class JavaPrefixSpanSuite {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaPrefixSpan");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ }
+
+ @Test
+ public void runPrefixSpan() {
+ JavaRDD<List<List<Integer>>> sequences = sc.parallelize(Arrays.asList(
+ Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3)),
+ Arrays.asList(Arrays.asList(1), Arrays.asList(3, 2), Arrays.asList(1, 2)),
+ Arrays.asList(Arrays.asList(1, 2), Arrays.asList(5)),
+ Arrays.asList(Arrays.asList(6))
+ ), 2);
+ PrefixSpan prefixSpan = new PrefixSpan()
+ .setMinSupport(0.5)
+ .setMaxPatternLength(5);
+ PrefixSpanModel<Integer> model = prefixSpan.run(sequences);
+ JavaRDD<FreqSequence<Integer>> freqSeqs = model.freqSequences().toJavaRDD();
+ List<FreqSequence<Integer>> localFreqSeqs = freqSeqs.collect();
+ Assert.assertEquals(5, localFreqSeqs.size());
+ // Check that each frequent sequence could be materialized.
+ for (PrefixSpan.FreqSequence<Integer> freqSeq: localFreqSeqs) {
+ List<List<Integer>> seq = freqSeq.javaSequence();
+ long freq = freqSeq.freq();
+ }
+ }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
index d87f61e385..0ae48d62cc 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
@@ -296,7 +296,7 @@ class PrefixSpanSuite extends SparkFunSuite with MLlibTestSparkContext {
5 <{1,2}> 0.75
*/
- val result = prefixspan.run(rdd)
+ val model = prefixspan.run(rdd)
val expected = Array(
(Array(Array(1)), 3L),
(Array(Array(2)), 3L),
@@ -304,7 +304,7 @@ class PrefixSpanSuite extends SparkFunSuite with MLlibTestSparkContext {
(Array(Array(1), Array(3)), 2L),
(Array(Array(1, 2)), 3L)
)
- compareResults(expected, result.collect())
+ compareResults(expected, model.freqSequences.collect().map(x => (x.sequence, x.freq)))
}
test("PrefixSpan String type, variable-size itemsets") {
@@ -322,7 +322,7 @@ class PrefixSpanSuite extends SparkFunSuite with MLlibTestSparkContext {
.setMinSupport(0.5)
.setMaxPatternLength(5)
- val result = prefixspan.run(rdd)
+ val model = prefixspan.run(rdd)
val expected = Array(
(Array(Array(1)), 3L),
(Array(Array(2)), 3L),
@@ -332,7 +332,7 @@ class PrefixSpanSuite extends SparkFunSuite with MLlibTestSparkContext {
).map { case (pattern, count) =>
(pattern.map(itemSet => itemSet.map(intToString)), count)
}
- compareResults(expected, result.collect())
+ compareResults(expected, model.freqSequences.collect().map(x => (x.sequence, x.freq)))
}
private def compareResults[Item](