aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test/java
diff options
context:
space:
mode:
authorJacky Li <jacky.likun@huawei.com>2015-02-03 17:02:42 -0800
committerXiangrui Meng <meng@databricks.com>2015-02-03 17:02:42 -0800
commite380d2d46c92b319eafe30974ac7c1509081fca4 (patch)
tree687991ded2c9e13324585d8160397fc89ca75478 /mllib/src/test/java
parent068c0e2ee05ee8b133c2dc26b8fa094ab2712d45 (diff)
downloadspark-e380d2d46c92b319eafe30974ac7c1509081fca4.tar.gz
spark-e380d2d46c92b319eafe30974ac7c1509081fca4.tar.bz2
spark-e380d2d46c92b319eafe30974ac7c1509081fca4.zip
[SPARK-5520][MLlib] Make FP-Growth implementation take generic item types (WIP)
Make FPGrowth.run API take generic item types: `def run[Item: ClassTag, Basket <: Iterable[Item]](data: RDD[Basket]): FPGrowthModel[Item]` so that user can invoke it by run[String, Seq[String]], run[Int, Seq[Int]], run[Int, List[Int]], etc. Scala part is done, while java part is still in progress Author: Jacky Li <jacky.likun@huawei.com> Author: Jacky Li <jackylk@users.noreply.github.com> Author: Xiangrui Meng <meng@databricks.com> Closes #4340 from jackylk/SPARK-5520-WIP and squashes the following commits: f5acf84 [Jacky Li] Merge pull request #2 from mengxr/SPARK-5520 63073d0 [Xiangrui Meng] update to make generic FPGrowth Java-friendly 737d8bb [Jacky Li] fix scalastyle 793f85c [Jacky Li] add Java test case 7783351 [Jacky Li] add generic support in FPGrowth
Diffstat (limited to 'mllib/src/test/java')
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java84
1 files changed, 84 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
new file mode 100644
index 0000000000..851707c8a1
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.fpm;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import com.google.common.collect.Lists;
+import static org.junit.Assert.*;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaFPGrowthSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaFPGrowth");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ }
+
+ @Test
+ public void runFPGrowth() {
+
+ @SuppressWarnings("unchecked")
+ JavaRDD<ArrayList<String>> rdd = sc.parallelize(Lists.newArrayList(
+ Lists.newArrayList("r z h k p".split(" ")),
+ Lists.newArrayList("z y x w v u t s".split(" ")),
+ Lists.newArrayList("s x o n r".split(" ")),
+ Lists.newArrayList("x z y m t s q e".split(" ")),
+ Lists.newArrayList("z".split(" ")),
+ Lists.newArrayList("x z y r q t p".split(" "))), 2);
+
+ FPGrowth fpg = new FPGrowth();
+
+ FPGrowthModel<String> model6 = fpg
+ .setMinSupport(0.9)
+ .setNumPartitions(1)
+ .run(rdd);
+ assertEquals(0, model6.javaFreqItemsets().count());
+
+ FPGrowthModel<String> model3 = fpg
+ .setMinSupport(0.5)
+ .setNumPartitions(2)
+ .run(rdd);
+ assertEquals(18, model3.javaFreqItemsets().count());
+
+ FPGrowthModel<String> model2 = fpg
+ .setMinSupport(0.3)
+ .setNumPartitions(4)
+ .run(rdd);
+ assertEquals(54, model2.javaFreqItemsets().count());
+
+ FPGrowthModel<String> model1 = fpg
+ .setMinSupport(0.1)
+ .setNumPartitions(8)
+ .run(rdd);
+ assertEquals(625, model1.javaFreqItemsets().count());
+ }
+}