diff options
Diffstat (limited to 'docs/mllib-feature-extraction.md')
-rw-r--r-- | docs/mllib-feature-extraction.md | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md index 886d71df47..197bc77d50 100644 --- a/docs/mllib-feature-extraction.md +++ b/docs/mllib-feature-extraction.md @@ -203,6 +203,23 @@ for((synonym, cosineSimilarity) <- synonyms) { } {% endhighlight %} </div> +<div data-lang="python"> +{% highlight python %} +from pyspark import SparkContext +from pyspark.mllib.feature import Word2Vec + +sc = SparkContext(appName='Word2Vec') +inp = sc.textFile("text8_lines").map(lambda row: row.split(" ")) + +word2vec = Word2Vec() +model = word2vec.fit(inp) + +synonyms = model.findSynonyms('china', 40) + +for word, cosine_distance in synonyms: + print "{}: {}".format(word, cosine_distance) +{% endhighlight %} +</div> </div> ## StandardScaler |