aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test/java/org
diff options
context:
space:
mode:
authorFeynman Liang <fliang@databricks.com>2015-07-07 11:34:30 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-07 11:34:30 -0700
commit3336c7b148ad543d1f9b64ca2b559ea04930f5be (patch)
tree5834336b9b0f9db2b26e236fa3fa2713e001140a /mllib/src/test/java/org
parent70beb808e13f6371968ac87f7cf625ed110375e6 (diff)
downloadspark-3336c7b148ad543d1f9b64ca2b559ea04930f5be.tar.gz
spark-3336c7b148ad543d1f9b64ca2b559ea04930f5be.tar.bz2
spark-3336c7b148ad543d1f9b64ca2b559ea04930f5be.zip
[SPARK-8559] [MLLIB] Support Association Rule Generation
Distributed generation of single-consequent association rules from a RDD of frequent itemsets. Tests referenced against `R`'s implementation of A Priori in [arules](http://cran.r-project.org/web/packages/arules/index.html). Author: Feynman Liang <fliang@databricks.com> Closes #7005 from feynmanliang/fp-association-rules-distributed and squashes the following commits: 466ced0 [Feynman Liang] Refactor AR generation impl 73c1cff [Feynman Liang] Make rule attributes public, remove numTransactions from FreqItemset 80f63ff [Feynman Liang] Change default confidence and optimize imports 04cf5b5 [Feynman Liang] Code review with @mengxr, add R to tests 0cc1a6a [Feynman Liang] Java compatibility test f3c14b5 [Feynman Liang] Fix MiMa test 764375e [Feynman Liang] Fix tests 1187307 [Feynman Liang] Almost working tests b20779b [Feynman Liang] Working implementation 5395c4e [Feynman Liang] Fix imports 2d34405 [Feynman Liang] Partial implementation of distributed ar 83ace4b [Feynman Liang] Local rule generation without pruning complete 69c2c87 [Feynman Liang] Working local implementation, now to parallelize../.. 4e1ec9a [Feynman Liang] Pull FreqItemsets out, refactor type param, tests 69ccedc [Feynman Liang] First implementation of association rule generation
Diffstat (limited to 'mllib/src/test/java/org')
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java58
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java5
2 files changed, 60 insertions, 3 deletions
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
new file mode 100644
index 0000000000..b3815ae603
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.mllib.fpm;
+
+import java.io.Serializable;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset;
+
+
+public class JavaAssociationRulesSuite implements Serializable {
+ private transient JavaSparkContext sc;
+
+ @Before
+ public void setUp() {
+ sc = new JavaSparkContext("local", "JavaFPGrowth");
+ }
+
+ @After
+ public void tearDown() {
+ sc.stop();
+ sc = null;
+ }
+
+ @Test
+ public void runAssociationRules() {
+
+ @SuppressWarnings("unchecked")
+ JavaRDD<FPGrowth.FreqItemset<String>> freqItemsets = sc.parallelize(Lists.newArrayList(
+ new FreqItemset<String>(new String[] {"a"}, 15L),
+ new FreqItemset<String>(new String[] {"b"}, 35L),
+ new FreqItemset<String>(new String[] {"a", "b"}, 18L)
+ ));
+
+ JavaRDD<AssociationRules.Rule<String>> results = (new AssociationRules()).run(freqItemsets);
+ }
+}
+
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
index bd0edf2b9e..9ce2c52dca 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
@@ -29,7 +29,6 @@ import static org.junit.Assert.*;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset;
public class JavaFPGrowthSuite implements Serializable {
private transient JavaSparkContext sc;
@@ -62,10 +61,10 @@ public class JavaFPGrowthSuite implements Serializable {
.setNumPartitions(2)
.run(rdd);
- List<FreqItemset<String>> freqItemsets = model.freqItemsets().toJavaRDD().collect();
+ List<FPGrowth.FreqItemset<String>> freqItemsets = model.freqItemsets().toJavaRDD().collect();
assertEquals(18, freqItemsets.size());
- for (FreqItemset<String> itemset: freqItemsets) {
+ for (FPGrowth.FreqItemset<String> itemset: freqItemsets) {
// Test return types.
List<String> items = itemset.javaItems();
long freq = itemset.freq();