aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml
diff options
context:
space:
mode:
authorWeichenXu <WeichenXu123@outlook.com>2016-06-10 12:26:53 +0100
committerSean Owen <sowen@cloudera.com>2016-06-10 12:26:53 +0100
commitcdd7f5a57a21d4a8f93456d149f65859c96190cf (patch)
treeb72d79281ab8713f0a9f8b37d59815a06a5c1837 /python/pyspark/ml
parent16ca32eace39c423224b0ec25922038fd45c501a (diff)
downloadspark-cdd7f5a57a21d4a8f93456d149f65859c96190cf.tar.gz
spark-cdd7f5a57a21d4a8f93456d149f65859c96190cf.tar.bz2
spark-cdd7f5a57a21d4a8f93456d149f65859c96190cf.zip
[SPARK-15837][ML][PYSPARK] Word2vec python add maxsentence parameter
## What changes were proposed in this pull request? Word2vec python add maxsentence parameter. ## How was this patch tested? Existing test. Author: WeichenXu <WeichenXu123@outlook.com> Closes #13578 from WeichenXu123/word2vec_python_add_maxsentence.
Diffstat (limited to 'python/pyspark/ml')
-rwxr-xr-xpython/pyspark/ml/feature.py29
1 files changed, 24 insertions, 5 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index ebe13006ad..bfb2fb7071 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2244,28 +2244,33 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
windowSize = Param(Params._dummy(), "windowSize",
"the window size (context words from [-window, window]). Default value is 5",
typeConverter=TypeConverters.toInt)
+ maxSentenceLength = Param(Params._dummy(), "maxSentenceLength",
+ "Maximum length (in words) of each sentence in the input data. " +
+ "Any sentence longer than this threshold will " +
+ "be divided into chunks up to the size.",
+ typeConverter=TypeConverters.toInt)
@keyword_only
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=None, inputCol=None, outputCol=None, windowSize=5):
+ seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
"""
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
- seed=None, inputCol=None, outputCol=None, windowSize=5)
+ seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
"""
super(Word2Vec, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=None, windowSize=5)
+ seed=None, windowSize=5, maxSentenceLength=1000)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@keyword_only
@since("1.4.0")
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=None, inputCol=None, outputCol=None, windowSize=5):
+ seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
"""
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
- inputCol=None, outputCol=None, windowSize=5)
+ inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
Sets params for this Word2Vec.
"""
kwargs = self.setParams._input_kwargs
@@ -2327,6 +2332,20 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
"""
return self.getOrDefault(self.windowSize)
+ @since("2.0.0")
+ def setMaxSentenceLength(self, value):
+ """
+ Sets the value of :py:attr:`maxSentenceLength`.
+ """
+ return self._set(maxSentenceLength=value)
+
+ @since("2.0.0")
+ def getMaxSentenceLength(self):
+ """
+ Gets the value of maxSentenceLength or its default value.
+ """
+ return self.getOrDefault(self.maxSentenceLength)
+
def _create_model(self, java_model):
return Word2VecModel(java_model)