diff options
author | Jason Lee <cjlee@us.ibm.com> | 2016-04-18 12:47:14 -0700 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2016-04-18 12:47:14 -0700 |
commit | 3d66a2ce9bfc19096e07181f9e970372d32bbc0b (patch) | |
tree | d2e5205d84bd63a764801ff106f098897e507c41 /mllib/src/main | |
parent | d280d1da1aec925687a0bfb496f3a6e0979e896f (diff) | |
download | spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.gz spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.bz2 spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.zip |
[SPARK-14564][ML][MLLIB][PYSPARK] Python Word2Vec missing setWindowSize method
## What changes were proposed in this pull request?
Added windowSize getter/setter to ML/MLlib
## How was this patch tested?
Added test cases in tests.py under both ML and MLlib
Author: Jason Lee <cjlee@us.ibm.com>
Closes #12428 from jasoncl/SPARK-14564.
Diffstat (limited to 'mllib/src/main')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 1a58779055..32dc16de08 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -671,6 +671,7 @@ private[python] class PythonMLLibAPI extends Serializable { * @param numPartitions number of partitions * @param numIterations number of iterations * @param seed initial seed for random generator + * @param windowSize size of window * @return A handle to java Word2VecModelWrapper instance at python side */ def trainWord2VecModel( @@ -680,7 +681,8 @@ private[python] class PythonMLLibAPI extends Serializable { numPartitions: Int, numIterations: Int, seed: Long, - minCount: Int): Word2VecModelWrapper = { + minCount: Int, + windowSize: Int): Word2VecModelWrapper = { val word2vec = new Word2Vec() .setVectorSize(vectorSize) .setLearningRate(learningRate) @@ -688,6 +690,7 @@ private[python] class PythonMLLibAPI extends Serializable { .setNumIterations(numIterations) .setSeed(seed) .setMinCount(minCount) + .setWindowSize(windowSize) try { val model = word2vec.fit(dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER)) new Word2VecModelWrapper(model) |