aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/feature.py
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-05-20 15:16:12 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-05-20 15:16:12 -0700
commit191ee474527530246ac3164ae9631e01bdd1e647 (patch)
tree24a8b2d7991f39f478b05f2cb3a7b14c539a9eb9 /python/pyspark/ml/feature.py
parent6338c40da61de045485c51aa11a5b1e425d22144 (diff)
downloadspark-191ee474527530246ac3164ae9631e01bdd1e647.tar.gz
spark-191ee474527530246ac3164ae9631e01bdd1e647.tar.bz2
spark-191ee474527530246ac3164ae9631e01bdd1e647.zip
[SPARK-7511] [MLLIB] pyspark ml seed param should be random by default or 42 is quite funny but not very random
Author: Holden Karau <holden@pigscanfly.ca> Closes #6139 from holdenk/SPARK-7511-pyspark-ml-seed-param-should-be-random-by-default-or-42-is-quite-funny-but-not-very-random and squashes the following commits: 591f8e5 [Holden Karau] specify old seed for doc tests 2470004 [Holden Karau] Fix a bunch of seeds with default values to have None as the default which will then result in using the hash of the class name cbad96d [Holden Karau] Add the setParams function that is used in the real code 423b8d7 [Holden Karau] Switch the test code to behave slightly more like production code. also don't check the param map value only check for key existence 140d25d [Holden Karau] remove extra space 926165a [Holden Karau] Add some missing newlines for pep8 style 8616751 [Holden Karau] merge in master 58532e6 [Holden Karau] its the __name__ method, also treat None values as not set 56ef24a [Holden Karau] fix test and regenerate base afdaa5c [Holden Karau] make sure different classes have different results 68eb528 [Holden Karau] switch default seed to hash of type of self 89c4611 [Holden Karau] Merge branch 'master' into SPARK-7511-pyspark-ml-seed-param-should-be-random-by-default-or-42-is-quite-funny-but-not-very-random 31cd96f [Holden Karau] specify the seed to randomforestregressor test e1b947f [Holden Karau] Style fixes ce90ec8 [Holden Karau] merge in master bcdf3c9 [Holden Karau] update docstring seeds to none and some other default seeds from 42 65eba21 [Holden Karau] pep8 fixes 0e3797e [Holden Karau] Make seed default to random in more places 213a543 [Holden Karau] Simplify the generated code to only include set default if there is a default rather than having None is note None in the generated code 1ff17c2 [Holden Karau] Make the seed random for HasSeed in python
Diffstat (limited to 'python/pyspark/ml/feature.py')
-rw-r--r--python/pyspark/ml/feature.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index c8115cb5bc..5511dceb70 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -876,10 +876,10 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
@keyword_only
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=42, inputCol=None, outputCol=None):
+ seed=None, inputCol=None, outputCol=None):
"""
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
- seed=42, inputCol=None, outputCol=None)
+ seed=None, inputCol=None, outputCol=None)
"""
super(Word2Vec, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
@@ -891,15 +891,15 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
"the minimum number of times a token must appear to be included " +
"in the word2vec model's vocabulary")
self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=42)
+ seed=None)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@keyword_only
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
- seed=42, inputCol=None, outputCol=None):
+ seed=None, inputCol=None, outputCol=None):
"""
- setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
+ setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
inputCol=None, outputCol=None)
Sets params for this Word2Vec.
"""