aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/feature.py
diff options
context:
space:
mode:
authorJason Lee <cjlee@us.ibm.com>2016-04-18 12:47:14 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-18 12:47:14 -0700
commit3d66a2ce9bfc19096e07181f9e970372d32bbc0b (patch)
treed2e5205d84bd63a764801ff106f098897e507c41 /python/pyspark/ml/feature.py
parentd280d1da1aec925687a0bfb496f3a6e0979e896f (diff)
downloadspark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.gz
spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.tar.bz2
spark-3d66a2ce9bfc19096e07181f9e970372d32bbc0b.zip
[SPARK-14564][ML][MLLIB][PYSPARK] Python Word2Vec missing setWindowSize method
## What changes were proposed in this pull request? Added windowSize getter/setter to ML/MLlib ## How was this patch tested? Added test cases in tests.py under both ML and MLlib Author: Jason Lee <cjlee@us.ibm.com> Closes #12428 from jasoncl/SPARK-14564.
Diffstat (limited to 'python/pyspark/ml/feature.py')
-rw-r--r--python/pyspark/ml/feature.py28
1 files changed, 23 insertions, 5 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 776906eaab..49a78ede37 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2219,28 +2219,31 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
minCount = Param(Params._dummy(), "minCount",
"the minimum number of times a token must appear to be included in the " +
"word2vec model's vocabulary", typeConverter=TypeConverters.toInt)
+ windowSize = Param(Params._dummy(), "windowSize",
+ "the window size (context words from [-window, window]). Default value is 5",
+ typeConverter=TypeConverters.toInt)
@keyword_only
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=None, inputCol=None, outputCol=None):
+ seed=None, inputCol=None, outputCol=None, windowSize=5):
"""
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
- seed=None, inputCol=None, outputCol=None)
+ seed=None, inputCol=None, outputCol=None, windowSize=5)
"""
super(Word2Vec, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=None)
+ seed=None, windowSize=5)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@keyword_only
@since("1.4.0")
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=None, inputCol=None, outputCol=None):
+ seed=None, inputCol=None, outputCol=None, windowSize=5):
"""
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
- inputCol=None, outputCol=None)
+ inputCol=None, outputCol=None, windowSize=5)
Sets params for this Word2Vec.
"""
kwargs = self.setParams._input_kwargs
@@ -2291,6 +2294,21 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
"""
return self.getOrDefault(self.minCount)
+ @since("2.0.0")
+ def setWindowSize(self, value):
+ """
+ Sets the value of :py:attr:`windowSize`.
+ """
+ self._set(windowSize=value)
+ return self
+
+ @since("2.0.0")
+ def getWindowSize(self):
+ """
+ Gets the value of windowSize or its default value.
+ """
+ return self.getOrDefault(self.windowSize)
+
def _create_model(self, java_model):
return Word2VecModel(java_model)