[SPARK-15741][PYSPARK][ML] Pyspark cleanup of set default seed to None

## What changes were proposed in this pull request? Several places set the seed Param default value to None which will translate to a zero value on the Scala side. This is unnecessary because a default fixed value already exists and if a test depends on a zero valued seed, then it should explicitly set it to zero instead of relying on this translation. These cases can be safely removed except for the ALS doc test, which has been changed to set the seed value to zero. ## How was this patch tested? Ran PySpark tests locally Author: Bryan Cutler <cutlerb@gmail.com> Closes #13672 from BryanCutler/pyspark-cleanup-setDefault-seed-SPARK-15741.
author: Bryan Cutler <cutlerb@gmail.com> 2016-06-21 11:43:25 -0700
committer: Xiangrui Meng <meng@databricks.com> 2016-06-21 11:43:25 -0700
commit: b76e3553760b3c68bebc2c71b0851598718e6f87 (patch)
tree: fabfaeac83c6eb6578a3f3e67085df2424261915
parent: 57746295e6fb705f8393a00ab1cc570ddb7da44e (diff)
download: spark-b76e3553760b3c68bebc2c71b0851598718e6f87.tar.gz
spark-b76e3553760b3c68bebc2c71b0851598718e6f87.tar.bz2
spark-b76e3553760b3c68bebc2c71b0851598718e6f87.zip
4 files changed, 7 insertions, 7 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index a3cd91790c..e86c27ecaf 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -685,7 +685,7 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="gini", numTrees=20, featureSubsetStrategy="auto")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -825,7 +825,7 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
             "org.apache.spark.ml.classification.GBTClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         lossType="logistic", maxIter=20, stepSize=0.1, seed=None)
+                         lossType="logistic", maxIter=20, stepSize=0.1)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 1e9ec0fbb4..bbbb94f9a0 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2260,7 +2260,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
         super(Word2Vec, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
         self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                         seed=None, windowSize=5, maxSentenceLength=1000)
+                         windowSize=5, maxSentenceLength=1000)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 0a7096794d..e28d38bd19 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -68,7 +68,7 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     >>> df = spark.createDataFrame(
     ...     [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
     ...     ["user", "item", "rating"])
-    >>> als = ALS(rank=10, maxIter=5)
+    >>> als = ALS(rank=10, maxIter=5, seed=0)
     >>> model = als.fit(df)
     >>> model.rank
     10
@@ -142,7 +142,7 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
         super(ALS, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.recommendation.ALS", self.uid)
         self._setDefault(rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
+                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item",
                          ratingCol="rating", nonnegative=False, checkpointInterval=10,
                          intermediateStorageLevel="MEMORY_AND_DISK",
                          finalStorageLevel="MEMORY_AND_DISK")
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 8d2378d51f..29efd6a852 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -894,7 +894,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
             "org.apache.spark.ml.regression.RandomForestRegressor", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
+                         impurity="variance", subsamplingRate=1.0, numTrees=20,
                          featureSubsetStrategy="auto")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -1023,7 +1023,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                          checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
-                         seed=None, impurity="variance")
+                         impurity="variance")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
author	Bryan Cutler <cutlerb@gmail.com>	2016-06-21 11:43:25 -0700
committer	Xiangrui Meng <meng@databricks.com>	2016-06-21 11:43:25 -0700
commit	b76e3553760b3c68bebc2c71b0851598718e6f87 (patch)
tree	fabfaeac83c6eb6578a3f3e67085df2424261915
parent	57746295e6fb705f8393a00ab1cc570ddb7da44e (diff)
download	spark-b76e3553760b3c68bebc2c71b0851598718e6f87.tar.gz spark-b76e3553760b3c68bebc2c71b0851598718e6f87.tar.bz2 spark-b76e3553760b3c68bebc2c71b0851598718e6f87.zip