diff options
author | Feynman Liang <fliang@databricks.com> | 2015-07-07 11:34:30 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-07-07 11:34:30 -0700 |
commit | 3336c7b148ad543d1f9b64ca2b559ea04930f5be (patch) | |
tree | 5834336b9b0f9db2b26e236fa3fa2713e001140a /mllib/src/test/java/org | |
parent | 70beb808e13f6371968ac87f7cf625ed110375e6 (diff) | |
download | spark-3336c7b148ad543d1f9b64ca2b559ea04930f5be.tar.gz spark-3336c7b148ad543d1f9b64ca2b559ea04930f5be.tar.bz2 spark-3336c7b148ad543d1f9b64ca2b559ea04930f5be.zip |
[SPARK-8559] [MLLIB] Support Association Rule Generation
Distributed generation of single-consequent association rules from a RDD of frequent itemsets. Tests referenced against `R`'s implementation of A Priori in [arules](http://cran.r-project.org/web/packages/arules/index.html).
Author: Feynman Liang <fliang@databricks.com>
Closes #7005 from feynmanliang/fp-association-rules-distributed and squashes the following commits:
466ced0 [Feynman Liang] Refactor AR generation impl
73c1cff [Feynman Liang] Make rule attributes public, remove numTransactions from FreqItemset
80f63ff [Feynman Liang] Change default confidence and optimize imports
04cf5b5 [Feynman Liang] Code review with @mengxr, add R to tests
0cc1a6a [Feynman Liang] Java compatibility test
f3c14b5 [Feynman Liang] Fix MiMa test
764375e [Feynman Liang] Fix tests
1187307 [Feynman Liang] Almost working tests
b20779b [Feynman Liang] Working implementation
5395c4e [Feynman Liang] Fix imports
2d34405 [Feynman Liang] Partial implementation of distributed ar
83ace4b [Feynman Liang] Local rule generation without pruning complete
69c2c87 [Feynman Liang] Working local implementation, now to parallelize../..
4e1ec9a [Feynman Liang] Pull FreqItemsets out, refactor type param, tests
69ccedc [Feynman Liang] First implementation of association rule generation
Diffstat (limited to 'mllib/src/test/java/org')
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java | 58 | ||||
-rw-r--r-- | mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java | 5 |
2 files changed, 60 insertions, 3 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java new file mode 100644 index 0000000000..b3815ae603 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.mllib.fpm; + +import java.io.Serializable; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset; + + +public class JavaAssociationRulesSuite implements Serializable { + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaFPGrowth"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + } + + @Test + public void runAssociationRules() { + + @SuppressWarnings("unchecked") + JavaRDD<FPGrowth.FreqItemset<String>> freqItemsets = sc.parallelize(Lists.newArrayList( + new FreqItemset<String>(new String[] {"a"}, 15L), + new FreqItemset<String>(new String[] {"b"}, 35L), + new FreqItemset<String>(new String[] {"a", "b"}, 18L) + )); + + JavaRDD<AssociationRules.Rule<String>> results = (new AssociationRules()).run(freqItemsets); + } +} + diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java index bd0edf2b9e..9ce2c52dca 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java @@ -29,7 +29,6 @@ import static org.junit.Assert.*; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset; public class JavaFPGrowthSuite implements Serializable { private transient JavaSparkContext sc; @@ -62,10 +61,10 @@ public class JavaFPGrowthSuite implements Serializable { .setNumPartitions(2) .run(rdd); - List<FreqItemset<String>> freqItemsets = model.freqItemsets().toJavaRDD().collect(); + List<FPGrowth.FreqItemset<String>> freqItemsets = model.freqItemsets().toJavaRDD().collect(); assertEquals(18, freqItemsets.size()); - for (FreqItemset<String> itemset: freqItemsets) { + for (FPGrowth.FreqItemset<String> itemset: freqItemsets) { // Test return types. List<String> items = itemset.javaItems(); long freq = itemset.freq(); |