diff options
author | Jacky Li <jacky.likun@huawei.com> | 2015-02-03 17:02:42 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-02-03 17:02:42 -0800 |
commit | e380d2d46c92b319eafe30974ac7c1509081fca4 (patch) | |
tree | 687991ded2c9e13324585d8160397fc89ca75478 /mllib/src/test/java | |
parent | 068c0e2ee05ee8b133c2dc26b8fa094ab2712d45 (diff) | |
download | spark-e380d2d46c92b319eafe30974ac7c1509081fca4.tar.gz spark-e380d2d46c92b319eafe30974ac7c1509081fca4.tar.bz2 spark-e380d2d46c92b319eafe30974ac7c1509081fca4.zip |
[SPARK-5520][MLlib] Make FP-Growth implementation take generic item types (WIP)
Make FPGrowth.run API take generic item types:
`def run[Item: ClassTag, Basket <: Iterable[Item]](data: RDD[Basket]): FPGrowthModel[Item]`
so that user can invoke it by run[String, Seq[String]], run[Int, Seq[Int]], run[Int, List[Int]], etc.
Scala part is done, while java part is still in progress
Author: Jacky Li <jacky.likun@huawei.com>
Author: Jacky Li <jackylk@users.noreply.github.com>
Author: Xiangrui Meng <meng@databricks.com>
Closes #4340 from jackylk/SPARK-5520-WIP and squashes the following commits:
f5acf84 [Jacky Li] Merge pull request #2 from mengxr/SPARK-5520
63073d0 [Xiangrui Meng] update to make generic FPGrowth Java-friendly
737d8bb [Jacky Li] fix scalastyle
793f85c [Jacky Li] add Java test case
7783351 [Jacky Li] add generic support in FPGrowth
Diffstat (limited to 'mllib/src/test/java')
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java new file mode 100644 index 0000000000..851707c8a1 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.fpm; + +import java.io.Serializable; +import java.util.ArrayList; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import com.google.common.collect.Lists; +import static org.junit.Assert.*; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +public class JavaFPGrowthSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaFPGrowth"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + } + + @Test + public void runFPGrowth() { + + @SuppressWarnings("unchecked") + JavaRDD<ArrayList<String>> rdd = sc.parallelize(Lists.newArrayList( + Lists.newArrayList("r z h k p".split(" ")), + Lists.newArrayList("z y x w v u t s".split(" ")), + Lists.newArrayList("s x o n r".split(" ")), + Lists.newArrayList("x z y m t s q e".split(" ")), + Lists.newArrayList("z".split(" ")), + Lists.newArrayList("x z y r q t p".split(" "))), 2); + + FPGrowth fpg = new FPGrowth(); + + FPGrowthModel<String> model6 = fpg + .setMinSupport(0.9) + .setNumPartitions(1) + .run(rdd); + assertEquals(0, model6.javaFreqItemsets().count()); + + FPGrowthModel<String> model3 = fpg + .setMinSupport(0.5) + .setNumPartitions(2) + .run(rdd); + assertEquals(18, model3.javaFreqItemsets().count()); + + FPGrowthModel<String> model2 = fpg + .setMinSupport(0.3) + .setNumPartitions(4) + .run(rdd); + assertEquals(54, model2.javaFreqItemsets().count()); + + FPGrowthModel<String> model1 = fpg + .setMinSupport(0.1) + .setNumPartitions(8) + .run(rdd); + assertEquals(625, model1.javaFreqItemsets().count()); + } +} |