From ef1047fca789e5470b7b12974f0435d6d1c4f2d5 Mon Sep 17 00:00:00 2001 From: Yong Gang Cao Date: Mon, 22 Feb 2016 09:47:36 +0000 Subject: [SPARK-12153][SPARK-7617][MLLIB] add support of arbitrary length sentence and other tuning for Word2Vec add support of arbitrary length sentence by using the nature representation of sentences in the input. add new similarity functions and add normalization option for distances in synonym finding add new accessor for internal structure(the vocabulary and wordindex) for convenience need instructions about how to set value for the Since annotation for newly added public functions. 1.5.3? jira link: https://issues.apache.org/jira/browse/SPARK-12153 Author: Yong Gang Cao Author: Yong-Gang Cao Closes #10152 from ygcao/improvementForSentenceBoundary. --- python/pyspark/ml/feature.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'python/pyspark/ml/feature.py') diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index d017a23188..464c9446f2 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1836,12 +1836,12 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has +----+--------------------+ ... >>> model.findSynonyms("a", 2).show() - +----+--------------------+ - |word| similarity| - +----+--------------------+ - | b| 0.16782984556103436| - | c|-0.46761559092107646| - +----+--------------------+ + +----+-------------------+ + |word| similarity| + +----+-------------------+ + | b| 0.2505344027513247| + | c|-0.6980510075367647| + +----+-------------------+ ... >>> model.transform(doc).head().model DenseVector([0.5524, -0.4995, -0.3599, 0.0241, 0.3461]) -- cgit v1.2.3