diff options
author | Xiangrui Meng <meng@databricks.com> | 2015-08-02 11:50:17 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-08-02 11:50:17 -0700 |
commit | 66924ffa6bdb8e0df1b90b789cb7ad443377e729 (patch) | |
tree | 1ba157fd78b99c18d7773c52d389cd02bed858b3 /mllib/src/test/java/org/apache | |
parent | 8eafa2aeb6c1b465cfdb99f04c2137fc3eac0c01 (diff) | |
download | spark-66924ffa6bdb8e0df1b90b789cb7ad443377e729.tar.gz spark-66924ffa6bdb8e0df1b90b789cb7ad443377e729.tar.bz2 spark-66924ffa6bdb8e0df1b90b789cb7ad443377e729.zip |
[SPARK-9527] [MLLIB] add PrefixSpanModel and make PrefixSpan Java friendly
1. Use `PrefixSpanModel` to wrap the frequent sequences.
2. Define `FreqSequence` to wrap each frequent sequence, which contains a Java-friendly method `javaSequence`
3. Overload `run` for Java users.
4. Added a unit test in Java to check Java compatibility.
zhangjiajin feynmanliang
Author: Xiangrui Meng <meng@databricks.com>
Closes #7869 from mengxr/SPARK-9527 and squashes the following commits:
4345594 [Xiangrui Meng] add PrefixSpanModel and make PrefixSpan Java friendly
Diffstat (limited to 'mllib/src/test/java/org/apache')
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java new file mode 100644 index 0000000000..34daf5fbde --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.fpm; + +import java.util.Arrays; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.fpm.PrefixSpan.FreqSequence; + +public class JavaPrefixSpanSuite { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaPrefixSpan"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + } + + @Test + public void runPrefixSpan() { + JavaRDD<List<List<Integer>>> sequences = sc.parallelize(Arrays.asList( + Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3)), + Arrays.asList(Arrays.asList(1), Arrays.asList(3, 2), Arrays.asList(1, 2)), + Arrays.asList(Arrays.asList(1, 2), Arrays.asList(5)), + Arrays.asList(Arrays.asList(6)) + ), 2); + PrefixSpan prefixSpan = new PrefixSpan() + .setMinSupport(0.5) + .setMaxPatternLength(5); + PrefixSpanModel<Integer> model = prefixSpan.run(sequences); + JavaRDD<FreqSequence<Integer>> freqSeqs = model.freqSequences().toJavaRDD(); + List<FreqSequence<Integer>> localFreqSeqs = freqSeqs.collect(); + Assert.assertEquals(5, localFreqSeqs.size()); + // Check that each frequent sequence could be materialized. + for (PrefixSpan.FreqSequence<Integer> freqSeq: localFreqSeqs) { + List<List<Integer>> seq = freqSeq.javaSequence(); + long freq = freqSeq.freq(); + } + } +} |