diff options
author | Jason Lee <cjlee@us.ibm.com> | 2016-04-18 12:47:14 -0700 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2016-04-18 12:47:14 -0700 |
commit | 3d66a2ce9bfc19096e07181f9e970372d32bbc0b (patch) | |
tree | d2e5205d84bd63a764801ff106f098897e507c41 /python/pyspark/ml/feature.py | |
parent | d280d1da1aec925687a0bfb496f3a6e0979e896f (diff) | |
download | spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.gz spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.bz2 spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.zip |
[SPARK-14564][ML][MLLIB][PYSPARK] Python Word2Vec missing setWindowSize method
## What changes were proposed in this pull request?
Added windowSize getter/setter to ML/MLlib
## How was this patch tested?
Added test cases in tests.py under both ML and MLlib
Author: Jason Lee <cjlee@us.ibm.com>
Closes #12428 from jasoncl/SPARK-14564.
Diffstat (limited to 'python/pyspark/ml/feature.py')
-rw-r--r-- | python/pyspark/ml/feature.py | 28 |
1 files changed, 23 insertions, 5 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 776906eaab..49a78ede37 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -2219,28 +2219,31 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has minCount = Param(Params._dummy(), "minCount", "the minimum number of times a token must appear to be included in the " + "word2vec model's vocabulary", typeConverter=TypeConverters.toInt) + windowSize = Param(Params._dummy(), "windowSize", + "the window size (context words from [-window, window]). Default value is 5", + typeConverter=TypeConverters.toInt) @keyword_only def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, - seed=None, inputCol=None, outputCol=None): + seed=None, inputCol=None, outputCol=None, windowSize=5): """ __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \ - seed=None, inputCol=None, outputCol=None) + seed=None, inputCol=None, outputCol=None, windowSize=5) """ super(Word2Vec, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid) self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, - seed=None) + seed=None, windowSize=5) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @keyword_only @since("1.4.0") def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, - seed=None, inputCol=None, outputCol=None): + seed=None, inputCol=None, outputCol=None, windowSize=5): """ setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \ - inputCol=None, outputCol=None) + inputCol=None, outputCol=None, windowSize=5) Sets params for this Word2Vec. """ kwargs = self.setParams._input_kwargs @@ -2291,6 +2294,21 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has """ return self.getOrDefault(self.minCount) + @since("2.0.0") + def setWindowSize(self, value): + """ + Sets the value of :py:attr:`windowSize`. + """ + self._set(windowSize=value) + return self + + @since("2.0.0") + def getWindowSize(self): + """ + Gets the value of windowSize or its default value. + """ + return self.getOrDefault(self.windowSize) + def _create_model(self, java_model): return Word2VecModel(java_model) |