aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@appier.com>2015-12-14 09:59:42 -0800
committerDavies Liu <davies.liu@gmail.com>2015-12-14 09:59:42 -0800
commitb51a4cdff3a7e640a8a66f7a9c17021f3056fd34 (patch)
tree8b550b38428e02e6913e437f3f04e8f45a775149 /python/pyspark
parente25f1fe42747be71c6b6e6357ca214f9544e3a46 (diff)
downloadspark-b51a4cdff3a7e640a8a66f7a9c17021f3056fd34.tar.gz
spark-b51a4cdff3a7e640a8a66f7a9c17021f3056fd34.tar.bz2
spark-b51a4cdff3a7e640a8a66f7a9c17021f3056fd34.zip
[SPARK-12016] [MLLIB] [PYSPARK] Wrap Word2VecModel when loading it in pyspark
JIRA: https://issues.apache.org/jira/browse/SPARK-12016 We should not directly use Word2VecModel in pyspark. We need to wrap it in a Word2VecModelWrapper when loading it in pyspark. Author: Liang-Chi Hsieh <viirya@appier.com> Closes #10100 from viirya/fix-load-py-wordvecmodel.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/mllib/feature.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 7b077b058c..7254679ebb 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -504,7 +504,8 @@ class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader):
"""
jmodel = sc._jvm.org.apache.spark.mllib.feature \
.Word2VecModel.load(sc._jsc.sc(), path)
- return Word2VecModel(jmodel)
+ model = sc._jvm.Word2VecModelWrapper(jmodel)
+ return Word2VecModel(model)
@ignore_unicode_prefix
@@ -546,6 +547,9 @@ class Word2Vec(object):
>>> sameModel = Word2VecModel.load(sc, path)
>>> model.transform("a") == sameModel.transform("a")
True
+ >>> syms = sameModel.findSynonyms("a", 2)
+ >>> [s[0] for s in syms]
+ [u'b', u'c']
>>> from shutil import rmtree
>>> try:
... rmtree(path)