aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-07-06 16:11:22 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-06 16:11:22 -0700
commit96c5eeec3970e8b1ebc6ddf5c97a7acc47f539dc (patch)
treee1c49b01584cc946679625a7a749803a62e81181 /python
parent1165b17d24cdf1dbebb2faca14308dfe5c2a652c (diff)
downloadspark-96c5eeec3970e8b1ebc6ddf5c97a7acc47f539dc.tar.gz
spark-96c5eeec3970e8b1ebc6ddf5c97a7acc47f539dc.tar.bz2
spark-96c5eeec3970e8b1ebc6ddf5c97a7acc47f539dc.zip
Revert "[SPARK-7212] [MLLIB] Add sequence learning flag"
This reverts commit 25f574eb9a3cb9b93b7d9194a8ec16e00ce2c036. After speaking to some users and developers, we realized that FP-growth doesn't meet the requirement for frequent sequence mining. PrefixSpan (SPARK-6487) would be the correct algorithm for it. feynmanliang Author: Xiangrui Meng <meng@databricks.com> Closes #7240 from mengxr/SPARK-7212.revert and squashes the following commits: 2b3d66b [Xiangrui Meng] Revert "[SPARK-7212] [MLLIB] Add sequence learning flag"
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/fpm.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index b7f00d6006..bdc4a132b1 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -39,8 +39,8 @@ class FPGrowthModel(JavaModelWrapper):
>>> data = [["a", "b", "c"], ["a", "b", "d", "e"], ["a", "c", "e"], ["a", "c", "f"]]
>>> rdd = sc.parallelize(data, 2)
>>> model = FPGrowth.train(rdd, 0.6, 2)
- >>> sorted(model.freqItemsets().collect(), key=lambda x: x.items)
- [FreqItemset(items=[u'a'], freq=4), FreqItemset(items=[u'a', u'c'], freq=3), ...
+ >>> sorted(model.freqItemsets().collect())
+ [FreqItemset(items=[u'a'], freq=4), FreqItemset(items=[u'c'], freq=3), ...
"""
def freqItemsets(self):