From 3d66a2ce9bfc19096e07181f9e970372d32bbc0b Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Mon, 18 Apr 2016 12:47:14 -0700 Subject: [SPARK-14564][ML][MLLIB][PYSPARK] Python Word2Vec missing setWindowSize method ## What changes were proposed in this pull request? Added windowSize getter/setter to ML/MLlib ## How was this patch tested? Added test cases in tests.py under both ML and MLlib Author: Jason Lee Closes #12428 from jasoncl/SPARK-14564. --- python/pyspark/mllib/feature.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'python/pyspark/mllib/feature.py') diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index b3dd2f63a5..90559f6cfb 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -617,6 +617,7 @@ class Word2Vec(object): self.numIterations = 1 self.seed = random.randint(0, sys.maxsize) self.minCount = 5 + self.windowSize = 5 @since('1.2.0') def setVectorSize(self, vectorSize): @@ -669,6 +670,14 @@ class Word2Vec(object): self.minCount = minCount return self + @since('2.0.0') + def setWindowSize(self, windowSize): + """ + Sets window size (default: 5). + """ + self.windowSize = windowSize + return self + @since('1.2.0') def fit(self, data): """ @@ -682,7 +691,7 @@ class Word2Vec(object): jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize), float(self.learningRate), int(self.numPartitions), int(self.numIterations), int(self.seed), - int(self.minCount)) + int(self.minCount), int(self.windowSize)) return Word2VecModel(jmodel) -- cgit v1.2.3