aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorJason Lee <cjlee@us.ibm.com>2016-04-18 12:47:14 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-18 12:47:14 -0700
commit3d66a2ce9bfc19096e07181f9e970372d32bbc0b (patch)
treed2e5205d84bd63a764801ff106f098897e507c41 /mllib
parentd280d1da1aec925687a0bfb496f3a6e0979e896f (diff)
downloadspark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.gz
spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.bz2
spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.zip
[SPARK-14564][ML][MLLIB][PYSPARK] Python Word2Vec missing setWindowSize method
## What changes were proposed in this pull request? Added windowSize getter/setter to ML/MLlib ## How was this patch tested? Added test cases in tests.py under both ML and MLlib Author: Jason Lee <cjlee@us.ibm.com> Closes #12428 from jasoncl/SPARK-14564.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala5
1 files changed, 4 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 1a58779055..32dc16de08 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -671,6 +671,7 @@ private[python] class PythonMLLibAPI extends Serializable {
* @param numPartitions number of partitions
* @param numIterations number of iterations
* @param seed initial seed for random generator
+ * @param windowSize size of window
* @return A handle to java Word2VecModelWrapper instance at python side
*/
def trainWord2VecModel(
@@ -680,7 +681,8 @@ private[python] class PythonMLLibAPI extends Serializable {
numPartitions: Int,
numIterations: Int,
seed: Long,
- minCount: Int): Word2VecModelWrapper = {
+ minCount: Int,
+ windowSize: Int): Word2VecModelWrapper = {
val word2vec = new Word2Vec()
.setVectorSize(vectorSize)
.setLearningRate(learningRate)
@@ -688,6 +690,7 @@ private[python] class PythonMLLibAPI extends Serializable {
.setNumIterations(numIterations)
.setSeed(seed)
.setMinCount(minCount)
+ .setWindowSize(windowSize)
try {
val model = word2vec.fit(dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER))
new Word2VecModelWrapper(model)