From 67cc89ff028324ba4a7a7d9c19a268b9afea0031 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 3 Jun 2016 15:56:17 -0700 Subject: [SPARK-15168][PYSPARK][ML] Add missing params to MultilayerPerceptronClassifier ## What changes were proposed in this pull request? MultilayerPerceptronClassifier is missing step size, solver, and weights. Add these params. Also clarify the scaladoc a bit while we are updating these params. Eventually we should follow up and unify the HasSolver params (filed https://issues.apache.org/jira/browse/SPARK-15169 ) ## How was this patch tested? Doc tests Author: Holden Karau Closes #12943 from holdenk/SPARK-15168-add-missing-params-to-MultilayerPerceptronClassifier. --- python/pyspark/ml/classification.py | 75 ++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 9 deletions(-) (limited to 'python') diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 177cf9d72c..7710fdf2e2 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -1041,7 +1041,8 @@ class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): @inherit_doc class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, - HasMaxIter, HasTol, HasSeed, JavaMLWritable, JavaMLReadable): + HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable, + JavaMLReadable): """ Classifier trainer based on the Multilayer Perceptron. Each layer has sigmoid activation function, output layer has softmax. @@ -1054,12 +1055,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, ... (1.0, Vectors.dense([0.0, 1.0])), ... (1.0, Vectors.dense([1.0, 0.0])), ... (0.0, Vectors.dense([1.0, 1.0]))], ["label", "features"]) - >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 5, 2], blockSize=1, seed=123) + >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 2, 2], blockSize=1, seed=123) >>> model = mlp.fit(df) >>> model.layers - [2, 5, 2] + [2, 2, 2] >>> model.weights.size - 27 + 12 >>> testDF = spark.createDataFrame([ ... (Vectors.dense([1.0, 0.0]),), ... (Vectors.dense([0.0, 0.0]),)], ["features"]) @@ -1083,6 +1084,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, True >>> model.weights == model2.weights True + >>> mlp2 = mlp2.setInitialWeights(list(range(0, 12))) + >>> model3 = mlp2.fit(df) + >>> model3.weights != model2.weights + True + >>> model3.layers == model.layers + True .. versionadded:: 1.6.0 """ @@ -1096,28 +1103,36 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, "remaining data in a partition then it is adjusted to the size of this " + "data. Recommended size is between 10 and 1000, default is 128.", typeConverter=TypeConverters.toInt) + solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " + + "options: l-bfgs, gd.", typeConverter=TypeConverters.toString) + initialWeights = Param(Params._dummy(), "initialWeights", "The initial weights of the model.", + typeConverter=TypeConverters.toVector) @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128): + maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, + solver="l-bfgs", initialWeights=None): """ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128) + maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \ + solver="l-bfgs", initialWeights=None) """ super(MultilayerPerceptronClassifier, self).__init__() self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid) - self._setDefault(maxIter=100, tol=1E-4, blockSize=128) + self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs") kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @keyword_only @since("1.6.0") def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128): + maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, + solver="l-bfgs", initialWeights=None): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128) + maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128, stepSize=0.03, \ + solver="l-bfgs", initialWeights=None) Sets params for MultilayerPerceptronClassifier. """ kwargs = self.setParams._input_kwargs @@ -1154,6 +1169,48 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, """ return self.getOrDefault(self.blockSize) + @since("2.0.0") + def setStepSize(self, value): + """ + Sets the value of :py:attr:`stepSize`. + """ + return self._set(stepSize=value) + + @since("2.0.0") + def getStepSize(self): + """ + Gets the value of stepSize or its default value. + """ + return self.getOrDefault(self.stepSize) + + @since("2.0.0") + def setSolver(self, value): + """ + Sets the value of :py:attr:`solver`. + """ + return self._set(solver=value) + + @since("2.0.0") + def getSolver(self): + """ + Gets the value of solver or its default value. + """ + return self.getOrDefault(self.solver) + + @since("2.0.0") + def setInitialWeights(self, value): + """ + Sets the value of :py:attr:`initialWeights`. + """ + return self._set(initialWeights=value) + + @since("2.0.0") + def getInitialWeights(self): + """ + Gets the value of initialWeights or its default value. + """ + return self.getOrDefault(self.initialWeights) + class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable): """ -- cgit v1.2.3