aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/classification.py
diff options
context:
space:
mode:
authorXusen Yin <yinxusen@gmail.com>2016-05-01 12:29:01 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-05-01 12:29:01 -0700
commita6428292f78fd594f41a4a7bf254d40268f46305 (patch)
tree4abbc07b299f0b05e563e21bcfdcc42afdfc4b2b /python/pyspark/ml/classification.py
parentcdf9e9753df4e7f2fa4e972d1bfded4e22943c27 (diff)
downloadspark-a6428292f78fd594f41a4a7bf254d40268f46305.tar.gz
spark-a6428292f78fd594f41a4a7bf254d40268f46305.tar.bz2
spark-a6428292f78fd594f41a4a7bf254d40268f46305.zip
[SPARK-14931][ML][PYTHON] Mismatched default values between pipelines in Spark and PySpark - update
## What changes were proposed in this pull request? This PR is an update for [https://github.com/apache/spark/pull/12738] which: * Adds a generic unit test for JavaParams wrappers in pyspark.ml for checking default Param values vs. the defaults in the Scala side * Various fixes for bugs found * This includes changing classes taking weightCol to treat unset and empty String Param values the same way. Defaults changed: * Scala * LogisticRegression: weightCol defaults to not set (instead of empty string) * StringIndexer: labels default to not set (instead of empty array) * GeneralizedLinearRegression: * maxIter always defaults to 25 (simpler than defaulting to 25 for a particular solver) * weightCol defaults to not set (instead of empty string) * LinearRegression: weightCol defaults to not set (instead of empty string) * Python * MultilayerPerceptron: layers default to not set (instead of [1,1]) * ChiSqSelector: numTopFeatures defaults to 50 (instead of not set) ## How was this patch tested? Generic unit test. Manually tested that unit test by changing defaults and verifying that broke the test. Author: Joseph K. Bradley <joseph@databricks.com> Author: yinxusen <yinxusen@gmail.com> Closes #12816 from jkbradley/yinxusen-SPARK-14931.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r--python/pyspark/ml/classification.py13
1 files changed, 5 insertions, 8 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index f616c7fbec..4331f73b73 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1056,7 +1056,7 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
"E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
- "neurons and output layer of 10 neurons, default is [1, 1].",
+ "neurons and output layer of 10 neurons.",
typeConverter=TypeConverters.toListInt)
blockSize = Param(Params._dummy(), "blockSize", "Block size for stacking input data in " +
"matrices. Data is stacked within partitions. If block size is more than " +
@@ -1069,12 +1069,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128)
+ maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
"""
super(MultilayerPerceptronClassifier, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
- self._setDefault(maxIter=100, tol=1E-4, layers=[1, 1], blockSize=128)
+ self._setDefault(maxIter=100, tol=1E-4, blockSize=128)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@@ -1084,14 +1084,11 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128)
+ maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128)
Sets params for MultilayerPerceptronClassifier.
"""
kwargs = self.setParams._input_kwargs
- if layers is None:
- return self._set(**kwargs).setLayers([1, 1])
- else:
- return self._set(**kwargs)
+ return self._set(**kwargs)
def _create_model(self, java_model):
return MultilayerPerceptronClassificationModel(java_model)