From cdeb97a8cd26e3282cc2a4f126242ed2199f3898 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sun, 4 Sep 2016 12:40:51 +0100
Subject: [SPARK-17311][MLLIB] Standardize Python-Java MLlib API to accept
 optional long seeds in all cases

## What changes were proposed in this pull request?

Related to https://github.com/apache/spark/pull/14524 -- just the 'fix' rather than a behavior change.

- PythonMLlibAPI methods that take a seed now always take a `java.lang.Long` consistently, allowing the Python API to specify "no seed"
- .mllib's Word2VecModel seemed to be an odd man out in .mllib in that it picked its own random seed. Instead it defaults to None, meaning, letting the Scala implementation pick a seed
- BisectingKMeansModel arguably should not hard-code a seed for consistency with .mllib, I think. However I left it.

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #14826 from srowen/SPARK-16832.2.
---
 python/pyspark/mllib/feature.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'python/pyspark')

diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 324ba9758e..b32d0c70ec 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -600,7 +600,7 @@ class Word2Vec(object):
         self.learningRate = 0.025
         self.numPartitions = 1
         self.numIterations = 1
-        self.seed = random.randint(0, sys.maxsize)
+        self.seed = None
         self.minCount = 5
         self.windowSize = 5
 
@@ -675,7 +675,7 @@ class Word2Vec(object):
             raise TypeError("data should be an RDD of list of string")
         jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize),
                                float(self.learningRate), int(self.numPartitions),
-                               int(self.numIterations), int(self.seed),
+                               int(self.numIterations), self.seed,
                                int(self.minCount), int(self.windowSize))
         return Word2VecModel(jmodel)
 
-- 
cgit v1.2.3