aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-05-19 10:27:17 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-05-19 10:27:17 -0700
commit8ecf7f77b2be0a178a8d94d60477876d4ab7517a (patch)
tree8a94965b565f6f6b52e8901f1da1a145da24d10c /mllib/src/main/scala
parent1052d3644d7eb0e784eb883293ce63a352a3b123 (diff)
downloadspark-8ecf7f77b2be0a178a8d94d60477876d4ab7517a.tar.gz
spark-8ecf7f77b2be0a178a8d94d60477876d4ab7517a.tar.bz2
spark-8ecf7f77b2be0a178a8d94d60477876d4ab7517a.zip
[SPARK-15292][ML] ML 2.0 QA: Scala APIs audit for classification
## What changes were proposed in this pull request? Audit Scala API for classification, almost all issues were related ```MultilayerPerceptronClassifier``` in this section. * Fix one wrong param getter function: ```getOptimizer``` -> ```getSolver``` * Add missing setter function for ```solver``` and ```stepSize```. * Make ```GD``` solver take effect. * Update docs, annotations and fix other minor issues. ## How was this patch tested? Existing unit tests. Author: Yanbo Liang <ybliang8@gmail.com> Closes #13076 from yanboliang/spark-15292.
Diffstat (limited to 'mllib/src/main/scala')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala121
1 files changed, 88 insertions, 33 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 683ae4aaf4..c4e882240f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -32,22 +32,22 @@ import org.apache.spark.ml.util._
import org.apache.spark.sql.Dataset
/** Params for Multilayer Perceptron. */
-private[ml] trait MultilayerPerceptronParams extends PredictorParams
+private[classification] trait MultilayerPerceptronParams extends PredictorParams
with HasSeed with HasMaxIter with HasTol with HasStepSize {
/**
* Layer sizes including input size and output size.
- * Default: Array(1, 1)
*
* @group param
*/
+ @Since("1.5.0")
final val layers: IntArrayParam = new IntArrayParam(this, "layers",
- "Sizes of layers from input layer to output layer" +
- " E.g., Array(780, 100, 10) means 780 inputs, " +
+ "Sizes of layers from input layer to output layer. " +
+ "E.g., Array(780, 100, 10) means 780 inputs, " +
"one hidden layer with 100 neurons and output layer of 10 neurons.",
- (t: Array[Int]) => t.forall(ParamValidators.gt(0)) && t.length > 1
- )
+ (t: Array[Int]) => t.forall(ParamValidators.gt(0)) && t.length > 1)
/** @group getParam */
+ @Since("1.5.0")
final def getLayers: Array[Int] = $(layers)
/**
@@ -59,42 +59,49 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
*
* @group expertParam
*/
+ @Since("1.5.0")
final val blockSize: IntParam = new IntParam(this, "blockSize",
"Block size for stacking input data in matrices. Data is stacked within partitions." +
" If block size is more than remaining data in a partition then " +
"it is adjusted to the size of this data. Recommended size is between 10 and 1000",
ParamValidators.gt(0))
- /** @group getParam */
+ /** @group expertGetParam */
+ @Since("1.5.0")
final def getBlockSize: Int = $(blockSize)
/**
- * Allows setting the solver: minibatch gradient descent (gd) or l-bfgs.
- * l-bfgs is the default one.
+ * The solver algorithm for optimization.
+ * Supported options: "gd" (minibatch gradient descent) or "l-bfgs".
+ * Default: "l-bfgs"
*
* @group expertParam
*/
+ @Since("2.0.0")
final val solver: Param[String] = new Param[String](this, "solver",
- " Allows setting the solver: minibatch gradient descent (gd) or l-bfgs. " +
- " l-bfgs is the default one.",
- ParamValidators.inArray[String](Array("gd", "l-bfgs")))
+ "The solver algorithm for optimization. Supported options: " +
+ s"${MultilayerPerceptronClassifier.supportedSolvers.mkString(", ")}. (Default l-bfgs)",
+ ParamValidators.inArray[String](MultilayerPerceptronClassifier.supportedSolvers))
- /** @group getParam */
- final def getOptimizer: String = $(solver)
+ /** @group expertGetParam */
+ @Since("2.0.0")
+ final def getSolver: String = $(solver)
/**
- * Model weights. Can be returned either after training or after explicit setting
+ * The initial weights of the model.
*
* @group expertParam
*/
- final val weights: Param[Vector] = new Param[Vector](this, "weights",
- " Sets the weights of the model ")
-
- /** @group getParam */
- final def getWeights: Vector = $(weights)
+ @Since("2.0.0")
+ final val initialWeights: Param[Vector] = new Param[Vector](this, "initialWeights",
+ "The initial weights of the model")
+ /** @group expertGetParam */
+ @Since("2.0.0")
+ final def getInitialWeights: Vector = $(initialWeights)
- setDefault(maxIter -> 100, tol -> 1e-4, blockSize -> 128, solver -> "l-bfgs", stepSize -> 0.03)
+ setDefault(maxIter -> 100, tol -> 1e-4, blockSize -> 128,
+ solver -> MultilayerPerceptronClassifier.LBFGS, stepSize -> 0.03)
}
/** Label to vector converter. */
@@ -145,15 +152,33 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
@Since("1.5.0")
def this() = this(Identifiable.randomUID("mlpc"))
- /** @group setParam */
+ /**
+ * Sets the value of param [[layers]].
+ *
+ * @group setParam
+ */
@Since("1.5.0")
def setLayers(value: Array[Int]): this.type = set(layers, value)
- /** @group setParam */
+ /**
+ * Sets the value of param [[blockSize]].
+ * Default is 128.
+ *
+ * @group expertSetParam
+ */
@Since("1.5.0")
def setBlockSize(value: Int): this.type = set(blockSize, value)
/**
+ * Sets the value of param [[solver]].
+ * Default is "l-bfgs".
+ *
+ * @group expertSetParam
+ */
+ @Since("2.0.0")
+ def setSolver(value: String): this.type = set(solver, value)
+
+ /**
* Set the maximum number of iterations.
* Default is 100.
*
@@ -181,12 +206,21 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
def setSeed(value: Long): this.type = set(seed, value)
/**
- * Sets the model weights.
+ * Sets the value of param [[initialWeights]].
*
- * @group expertParam
+ * @group expertSetParam
+ */
+ @Since("2.0.0")
+ def setInitialWeights(value: Vector): this.type = set(initialWeights, value)
+
+ /**
+ * Sets the value of param [[stepSize]] (applicable only for solver "gd").
+ * Default is 0.03.
+ *
+ * @group setParam
*/
@Since("2.0.0")
- def setWeights(value: Vector): this.type = set(weights, value)
+ def setStepSize(value: Double): this.type = set(stepSize, value)
@Since("1.5.0")
override def copy(extra: ParamMap): MultilayerPerceptronClassifier = defaultCopy(extra)
@@ -204,16 +238,26 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
val labels = myLayers.last
val lpData = extractLabeledPoints(dataset)
val data = lpData.map(lp => LabelConverter.encodeLabeledPoint(lp, labels))
- val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, true)
+ val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, softmaxOnTop = true)
val trainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last)
- if (isDefined(weights)) {
- trainer.setWeights($(weights))
+ if (isDefined(initialWeights)) {
+ trainer.setWeights($(initialWeights))
} else {
trainer.setSeed($(seed))
}
- trainer.LBFGSOptimizer
- .setConvergenceTol($(tol))
- .setNumIterations($(maxIter))
+ if ($(solver) == MultilayerPerceptronClassifier.LBFGS) {
+ trainer.LBFGSOptimizer
+ .setConvergenceTol($(tol))
+ .setNumIterations($(maxIter))
+ } else if ($(solver) == MultilayerPerceptronClassifier.GD) {
+ trainer.SGDOptimizer
+ .setNumIterations($(maxIter))
+ .setConvergenceTol($(tol))
+ .setStepSize($(stepSize))
+ } else {
+ throw new IllegalArgumentException(
+ s"The solver $solver is not supported by MultilayerPerceptronClassifier.")
+ }
trainer.setStackSize($(blockSize))
val mlpModel = trainer.train(data)
new MultilayerPerceptronClassificationModel(uid, myLayers, mlpModel.weights)
@@ -224,6 +268,15 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
object MultilayerPerceptronClassifier
extends DefaultParamsReadable[MultilayerPerceptronClassifier] {
+ /** String name for "l-bfgs" solver. */
+ private[classification] val LBFGS = "l-bfgs"
+
+ /** String name for "gd" (minibatch gradient descent) solver. */
+ private[classification] val GD = "gd"
+
+ /** Set of solvers that MultilayerPerceptronClassifier supports. */
+ private[classification] val supportedSolvers = Array(LBFGS, GD)
+
@Since("2.0.0")
override def load(path: String): MultilayerPerceptronClassifier = super.load(path)
}
@@ -250,7 +303,9 @@ class MultilayerPerceptronClassificationModel private[ml] (
@Since("1.6.0")
override val numFeatures: Int = layers.head
- private val mlpModel = FeedForwardTopology.multiLayerPerceptron(layers, true).model(weights)
+ private val mlpModel = FeedForwardTopology
+ .multiLayerPerceptron(layers, softmaxOnTop = true)
+ .model(weights)
/**
* Returns layers in a Java List.