aboutsummaryrefslogtreecommitdiff
path: root/mllib/src
diff options
context:
space:
mode:
authorTakahashi Hiroshi <takahashi.hiroshi@lab.ntt.co.jp>2015-12-07 23:46:55 -0800
committerXiangrui Meng <meng@databricks.com>2015-12-07 23:46:55 -0800
commit7d05a624510f7299b3dd07f87c203db1ff7caa3e (patch)
tree2d6c49e6df49199be0984e1b556bf3221c2bb2b9 /mllib/src
parent73896588dd3af6ba77c9692cd5120ee32448eb22 (diff)
downloadspark-7d05a624510f7299b3dd07f87c203db1ff7caa3e.tar.gz
spark-7d05a624510f7299b3dd07f87c203db1ff7caa3e.tar.bz2
spark-7d05a624510f7299b3dd07f87c203db1ff7caa3e.zip
[SPARK-10259][ML] Add @since annotation to ml.classification
Add since annotation to ml.classification Author: Takahashi Hiroshi <takahashi.hiroshi@lab.ntt.co.jp> Closes #8534 from taishi-oss/issue10259.
Diffstat (limited to 'mllib/src')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala30
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala35
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala64
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala23
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala19
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala34
7 files changed, 185 insertions, 44 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index c478aea44a..8c4cec1326 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.classification
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeClassifierParams}
import org.apache.spark.ml.tree.impl.RandomForest
@@ -36,32 +36,44 @@ import org.apache.spark.sql.DataFrame
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
+@Since("1.4.0")
@Experimental
-final class DecisionTreeClassifier(override val uid: String)
+final class DecisionTreeClassifier @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
extends ProbabilisticClassifier[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel]
with DecisionTreeParams with TreeClassifierParams {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("dtc"))
// Override parameter setters from parent trait for Java API compatibility.
+ @Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+ @Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+ @Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)
+ @Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+ @Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+ @Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+ @Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+ @Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)
+ @Since("1.6.0")
override def setSeed(value: Long): this.type = super.setSeed(value)
override protected def train(dataset: DataFrame): DecisionTreeClassificationModel = {
@@ -89,12 +101,15 @@ final class DecisionTreeClassifier(override val uid: String)
subsamplingRate = 1.0)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): DecisionTreeClassifier = defaultCopy(extra)
}
+@Since("1.4.0")
@Experimental
object DecisionTreeClassifier {
/** Accessor for supported impurities: entropy, gini */
+ @Since("1.4.0")
final val supportedImpurities: Array[String] = TreeClassifierParams.supportedImpurities
}
@@ -104,12 +119,13 @@ object DecisionTreeClassifier {
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
+@Since("1.4.0")
@Experimental
final class DecisionTreeClassificationModel private[ml] (
- override val uid: String,
- override val rootNode: Node,
- override val numFeatures: Int,
- override val numClasses: Int)
+ @Since("1.4.0")override val uid: String,
+ @Since("1.4.0")override val rootNode: Node,
+ @Since("1.6.0")override val numFeatures: Int,
+ @Since("1.5.0")override val numClasses: Int)
extends ProbabilisticClassificationModel[Vector, DecisionTreeClassificationModel]
with DecisionTreeModel with Serializable {
@@ -142,11 +158,13 @@ final class DecisionTreeClassificationModel private[ml] (
}
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeClassificationModel = {
copyValues(new DecisionTreeClassificationModel(uid, rootNode, numFeatures, numClasses), extra)
.setParent(parent)
}
+ @Since("1.4.0")
override def toString: String = {
s"DecisionTreeClassificationModel (uid=$uid) of depth $depth with $numNodes nodes"
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 74aef94bf7..cda2bca58c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
@@ -44,36 +44,47 @@ import org.apache.spark.sql.types.DoubleType
* It supports binary labels, as well as both continuous and categorical features.
* Note: Multiclass labels are not currently supported.
*/
+@Since("1.4.0")
@Experimental
-final class GBTClassifier(override val uid: String)
+final class GBTClassifier @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
with GBTParams with TreeClassifierParams with Logging {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("gbtc"))
// Override parameter setters from parent trait for Java API compatibility.
// Parameters from TreeClassifierParams:
+ @Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+ @Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+ @Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)
+ @Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+ @Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+ @Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+ @Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
/**
* The impurity setting is ignored for GBT models.
* Individual trees are built using impurity "Variance."
*/
+ @Since("1.4.0")
override def setImpurity(value: String): this.type = {
logWarning("GBTClassifier.setImpurity should NOT be used")
this
@@ -81,8 +92,10 @@ final class GBTClassifier(override val uid: String)
// Parameters from TreeEnsembleParams:
+ @Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+ @Since("1.4.0")
override def setSeed(value: Long): this.type = {
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
super.setSeed(value)
@@ -90,8 +103,10 @@ final class GBTClassifier(override val uid: String)
// Parameters from GBTParams:
+ @Since("1.4.0")
override def setMaxIter(value: Int): this.type = super.setMaxIter(value)
+ @Since("1.4.0")
override def setStepSize(value: Double): this.type = super.setStepSize(value)
// Parameters for GBTClassifier:
@@ -102,6 +117,7 @@ final class GBTClassifier(override val uid: String)
* (default = logistic)
* @group param
*/
+ @Since("1.4.0")
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
" tries to minimize (case-insensitive). Supported options:" +
s" ${GBTClassifier.supportedLossTypes.mkString(", ")}",
@@ -110,9 +126,11 @@ final class GBTClassifier(override val uid: String)
setDefault(lossType -> "logistic")
/** @group setParam */
+ @Since("1.4.0")
def setLossType(value: String): this.type = set(lossType, value)
/** @group getParam */
+ @Since("1.4.0")
def getLossType: String = $(lossType).toLowerCase
/** (private[ml]) Convert new loss to old loss. */
@@ -145,13 +163,16 @@ final class GBTClassifier(override val uid: String)
GBTClassificationModel.fromOld(oldModel, this, categoricalFeatures, numFeatures)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): GBTClassifier = defaultCopy(extra)
}
+@Since("1.4.0")
@Experimental
object GBTClassifier {
// The losses below should be lowercase.
/** Accessor for supported loss settings: logistic */
+ @Since("1.4.0")
final val supportedLossTypes: Array[String] = Array("logistic").map(_.toLowerCase)
}
@@ -164,12 +185,13 @@ object GBTClassifier {
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
+@Since("1.6.0")
@Experimental
final class GBTClassificationModel private[ml](
- override val uid: String,
+ @Since("1.6.0") override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
private val _treeWeights: Array[Double],
- override val numFeatures: Int)
+ @Since("1.6.0") override val numFeatures: Int)
extends PredictionModel[Vector, GBTClassificationModel]
with TreeEnsembleModel with Serializable {
@@ -182,11 +204,14 @@ final class GBTClassificationModel private[ml](
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
+ @Since("1.6.0")
def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) =
this(uid, _trees, _treeWeights, -1)
+ @Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
+ @Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights
override protected def transformImpl(dataset: DataFrame): DataFrame = {
@@ -205,11 +230,13 @@ final class GBTClassificationModel private[ml](
if (prediction > 0.0) 1.0 else 0.0
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): GBTClassificationModel = {
copyValues(new GBTClassificationModel(uid, _trees, _treeWeights, numFeatures),
extra).setParent(parent)
}
+ @Since("1.4.0")
override def toString: String = {
s"GBTClassificationModel (uid=$uid) with $numTrees trees"
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index d320d64dd9..19cc323d50 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -24,7 +24,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS,
import org.apache.hadoop.fs.Path
import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.annotation.{Since, Experimental}
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -154,11 +154,14 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
* Currently, this class only supports binary classification. It will support multiclass
* in the future.
*/
+@Since("1.2.0")
@Experimental
-class LogisticRegression(override val uid: String)
+class LogisticRegression @Since("1.2.0") (
+ @Since("1.4.0") override val uid: String)
extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel]
with LogisticRegressionParams with DefaultParamsWritable with Logging {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("logreg"))
/**
@@ -166,6 +169,7 @@ class LogisticRegression(override val uid: String)
* Default is 0.0.
* @group setParam
*/
+ @Since("1.2.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)
@@ -176,6 +180,7 @@ class LogisticRegression(override val uid: String)
* Default is 0.0 which is an L2 penalty.
* @group setParam
*/
+ @Since("1.4.0")
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
setDefault(elasticNetParam -> 0.0)
@@ -184,6 +189,7 @@ class LogisticRegression(override val uid: String)
* Default is 100.
* @group setParam
*/
+ @Since("1.2.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100)
@@ -193,6 +199,7 @@ class LogisticRegression(override val uid: String)
* Default is 1E-6.
* @group setParam
*/
+ @Since("1.4.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)
@@ -201,6 +208,7 @@ class LogisticRegression(override val uid: String)
* Default is true.
* @group setParam
*/
+ @Since("1.4.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true)
@@ -213,11 +221,14 @@ class LogisticRegression(override val uid: String)
* Default is true.
* @group setParam
*/
+ @Since("1.5.0")
def setStandardization(value: Boolean): this.type = set(standardization, value)
setDefault(standardization -> true)
+ @Since("1.5.0")
override def setThreshold(value: Double): this.type = super.setThreshold(value)
+ @Since("1.5.0")
override def getThreshold: Double = super.getThreshold
/**
@@ -226,11 +237,14 @@ class LogisticRegression(override val uid: String)
* Default is empty, so all instances have weight one.
* @group setParam
*/
+ @Since("1.6.0")
def setWeightCol(value: String): this.type = set(weightCol, value)
setDefault(weightCol -> "")
+ @Since("1.5.0")
override def setThresholds(value: Array[Double]): this.type = super.setThresholds(value)
+ @Since("1.5.0")
override def getThresholds: Array[Double] = super.getThresholds
override protected def train(dataset: DataFrame): LogisticRegressionModel = {
@@ -384,11 +398,14 @@ class LogisticRegression(override val uid: String)
model.setSummary(logRegSummary)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): LogisticRegression = defaultCopy(extra)
}
+@Since("1.6.0")
object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
+ @Since("1.6.0")
override def load(path: String): LogisticRegression = super.load(path)
}
@@ -396,23 +413,28 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
* :: Experimental ::
* Model produced by [[LogisticRegression]].
*/
+@Since("1.4.0")
@Experimental
class LogisticRegressionModel private[ml] (
- override val uid: String,
- val coefficients: Vector,
- val intercept: Double)
+ @Since("1.4.0") override val uid: String,
+ @Since("1.6.0") val coefficients: Vector,
+ @Since("1.3.0") val intercept: Double)
extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
with LogisticRegressionParams with MLWritable {
@deprecated("Use coefficients instead.", "1.6.0")
def weights: Vector = coefficients
+ @Since("1.5.0")
override def setThreshold(value: Double): this.type = super.setThreshold(value)
+ @Since("1.5.0")
override def getThreshold: Double = super.getThreshold
+ @Since("1.5.0")
override def setThresholds(value: Array[Double]): this.type = super.setThresholds(value)
+ @Since("1.5.0")
override def getThresholds: Array[Double] = super.getThresholds
/** Margin (rawPrediction) for class label 1. For binary classification only. */
@@ -426,8 +448,10 @@ class LogisticRegressionModel private[ml] (
1.0 / (1.0 + math.exp(-m))
}
+ @Since("1.6.0")
override val numFeatures: Int = coefficients.size
+ @Since("1.3.0")
override val numClasses: Int = 2
private var trainingSummary: Option[LogisticRegressionTrainingSummary] = None
@@ -436,6 +460,7 @@ class LogisticRegressionModel private[ml] (
* Gets summary of model on training set. An exception is
* thrown if `trainingSummary == None`.
*/
+ @Since("1.5.0")
def summary: LogisticRegressionTrainingSummary = trainingSummary match {
case Some(summ) => summ
case None =>
@@ -451,6 +476,7 @@ class LogisticRegressionModel private[ml] (
}
/** Indicates whether a training summary exists for this model instance. */
+ @Since("1.5.0")
def hasSummary: Boolean = trainingSummary.isDefined
/**
@@ -493,6 +519,7 @@ class LogisticRegressionModel private[ml] (
Vectors.dense(-m, m)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): LogisticRegressionModel = {
val newModel = copyValues(new LogisticRegressionModel(uid, coefficients, intercept), extra)
if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
@@ -710,12 +737,13 @@ sealed trait LogisticRegressionSummary extends Serializable {
* @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
*/
@Experimental
+@Since("1.5.0")
class BinaryLogisticRegressionTrainingSummary private[classification] (
- predictions: DataFrame,
- probabilityCol: String,
- labelCol: String,
- featuresCol: String,
- val objectiveHistory: Array[Double])
+ @Since("1.5.0") predictions: DataFrame,
+ @Since("1.5.0") probabilityCol: String,
+ @Since("1.5.0") labelCol: String,
+ @Since("1.6.0") featuresCol: String,
+ @Since("1.5.0") val objectiveHistory: Array[Double])
extends BinaryLogisticRegressionSummary(predictions, probabilityCol, labelCol, featuresCol)
with LogisticRegressionTrainingSummary {
@@ -731,11 +759,13 @@ class BinaryLogisticRegressionTrainingSummary private[classification] (
* @param featuresCol field in "predictions" which gives the features of each instance as a vector.
*/
@Experimental
+@Since("1.5.0")
class BinaryLogisticRegressionSummary private[classification] (
- @transient override val predictions: DataFrame,
- override val probabilityCol: String,
- override val labelCol: String,
- override val featuresCol: String) extends LogisticRegressionSummary {
+ @Since("1.5.0") @transient override val predictions: DataFrame,
+ @Since("1.5.0") override val probabilityCol: String,
+ @Since("1.5.0") override val labelCol: String,
+ @Since("1.6.0") override val featuresCol: String) extends LogisticRegressionSummary {
+
private val sqlContext = predictions.sqlContext
import sqlContext.implicits._
@@ -760,6 +790,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* This will change in later Spark versions.
* @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
*/
+ @Since("1.5.0")
@transient lazy val roc: DataFrame = binaryMetrics.roc().toDF("FPR", "TPR")
/**
@@ -768,6 +799,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* This will change in later Spark versions.
*/
+ @Since("1.5.0")
lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC()
/**
@@ -777,6 +809,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* This will change in later Spark versions.
*/
+ @Since("1.5.0")
@transient lazy val pr: DataFrame = binaryMetrics.pr().toDF("recall", "precision")
/**
@@ -785,6 +818,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* This will change in later Spark versions.
*/
+ @Since("1.5.0")
@transient lazy val fMeasureByThreshold: DataFrame = {
binaryMetrics.fMeasureByThreshold().toDF("threshold", "F-Measure")
}
@@ -797,6 +831,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* This will change in later Spark versions.
*/
+ @Since("1.5.0")
@transient lazy val precisionByThreshold: DataFrame = {
binaryMetrics.precisionByThreshold().toDF("threshold", "precision")
}
@@ -809,6 +844,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* This will change in later Spark versions.
*/
+ @Since("1.5.0")
@transient lazy val recallByThreshold: DataFrame = {
binaryMetrics.recallByThreshold().toDF("threshold", "recall")
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index cd7462596d..a691aa005e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.classification
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param.shared.{HasTol, HasMaxIter, HasSeed}
import org.apache.spark.ml.{PredictorParams, PredictionModel, Predictor}
import org.apache.spark.ml.param.{IntParam, ParamValidators, IntArrayParam, ParamMap}
@@ -104,19 +104,23 @@ private object LabelConverter {
* Each layer has sigmoid activation function, output layer has softmax.
* Number of inputs has to be equal to the size of feature vectors.
* Number of outputs has to be equal to the total number of labels.
- *
*/
+@Since("1.5.0")
@Experimental
-class MultilayerPerceptronClassifier(override val uid: String)
+class MultilayerPerceptronClassifier @Since("1.5.0") (
+ @Since("1.5.0") override val uid: String)
extends Predictor[Vector, MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel]
with MultilayerPerceptronParams {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("mlpc"))
/** @group setParam */
+ @Since("1.5.0")
def setLayers(value: Array[Int]): this.type = set(layers, value)
/** @group setParam */
+ @Since("1.5.0")
def setBlockSize(value: Int): this.type = set(blockSize, value)
/**
@@ -124,6 +128,7 @@ class MultilayerPerceptronClassifier(override val uid: String)
* Default is 100.
* @group setParam
*/
+ @Since("1.5.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
/**
@@ -132,14 +137,17 @@ class MultilayerPerceptronClassifier(override val uid: String)
* Default is 1E-4.
* @group setParam
*/
+ @Since("1.5.0")
def setTol(value: Double): this.type = set(tol, value)
/**
* Set the seed for weights initialization.
* @group setParam
*/
+ @Since("1.5.0")
def setSeed(value: Long): this.type = set(seed, value)
+ @Since("1.5.0")
override def copy(extra: ParamMap): MultilayerPerceptronClassifier = defaultCopy(extra)
/**
@@ -173,14 +181,16 @@ class MultilayerPerceptronClassifier(override val uid: String)
* @param weights vector of initial weights for the model that consists of the weights of layers
* @return prediction model
*/
+@Since("1.5.0")
@Experimental
class MultilayerPerceptronClassificationModel private[ml] (
- override val uid: String,
- val layers: Array[Int],
- val weights: Vector)
+ @Since("1.5.0") override val uid: String,
+ @Since("1.5.0") val layers: Array[Int],
+ @Since("1.5.0") val weights: Vector)
extends PredictionModel[Vector, MultilayerPerceptronClassificationModel]
with Serializable {
+ @Since("1.6.0")
override val numFeatures: Int = layers.head
private val mlpModel = FeedForwardTopology.multiLayerPerceptron(layers, true).getInstance(weights)
@@ -200,6 +210,7 @@ class MultilayerPerceptronClassificationModel private[ml] (
LabelConverter.decodeLabel(mlpModel.predict(features))
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): MultilayerPerceptronClassificationModel = {
copyValues(new MultilayerPerceptronClassificationModel(uid, layers, weights), extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index c512a2cb8b..718f49d3ae 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -72,11 +72,14 @@ private[ml] trait NaiveBayesParams extends PredictorParams {
* ([[http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html]]).
* The input feature values must be nonnegative.
*/
+@Since("1.5.0")
@Experimental
-class NaiveBayes(override val uid: String)
+class NaiveBayes @Since("1.5.0") (
+ @Since("1.5.0") override val uid: String)
extends ProbabilisticClassifier[Vector, NaiveBayes, NaiveBayesModel]
with NaiveBayesParams with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("nb"))
/**
@@ -84,6 +87,7 @@ class NaiveBayes(override val uid: String)
* Default is 1.0.
* @group setParam
*/
+ @Since("1.5.0")
def setSmoothing(value: Double): this.type = set(smoothing, value)
setDefault(smoothing -> 1.0)
@@ -93,6 +97,7 @@ class NaiveBayes(override val uid: String)
* Default is "multinomial"
* @group setParam
*/
+ @Since("1.5.0")
def setModelType(value: String): this.type = set(modelType, value)
setDefault(modelType -> OldNaiveBayes.Multinomial)
@@ -102,6 +107,7 @@ class NaiveBayes(override val uid: String)
NaiveBayesModel.fromOld(oldModel, this)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): NaiveBayes = defaultCopy(extra)
}
@@ -119,11 +125,12 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] {
* @param theta log of class conditional probabilities, whose dimension is C (number of classes)
* by D (number of features)
*/
+@Since("1.5.0")
@Experimental
class NaiveBayesModel private[ml] (
- override val uid: String,
- val pi: Vector,
- val theta: Matrix)
+ @Since("1.5.0") override val uid: String,
+ @Since("1.5.0") val pi: Vector,
+ @Since("1.5.0") val theta: Matrix)
extends ProbabilisticClassificationModel[Vector, NaiveBayesModel]
with NaiveBayesParams with MLWritable {
@@ -148,8 +155,10 @@ class NaiveBayesModel private[ml] (
throw new UnknownError(s"Invalid modelType: ${$(modelType)}.")
}
+ @Since("1.6.0")
override val numFeatures: Int = theta.numCols
+ @Since("1.5.0")
override val numClasses: Int = pi.size
private def multinomialCalculation(features: Vector) = {
@@ -206,10 +215,12 @@ class NaiveBayesModel private[ml] (
}
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): NaiveBayesModel = {
copyValues(new NaiveBayesModel(uid, pi, theta).setParent(this.parent), extra)
}
+ @Since("1.5.0")
override def toString: String = {
s"NaiveBayesModel (uid=$uid) with ${pi.size} classes"
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index debc164bf2..08a51109d6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -21,7 +21,7 @@ import java.util.UUID
import scala.language.existentials
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml._
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.param.{Param, ParamMap}
@@ -70,17 +70,20 @@ private[ml] trait OneVsRestParams extends PredictorParams {
* The i-th model is produced by testing the i-th class (taking label 1) vs the rest
* (taking label 0).
*/
+@Since("1.4.0")
@Experimental
final class OneVsRestModel private[ml] (
- override val uid: String,
- labelMetadata: Metadata,
- val models: Array[_ <: ClassificationModel[_, _]])
+ @Since("1.4.0") override val uid: String,
+ @Since("1.4.0") labelMetadata: Metadata,
+ @Since("1.4.0") val models: Array[_ <: ClassificationModel[_, _]])
extends Model[OneVsRestModel] with OneVsRestParams {
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType)
}
+ @Since("1.4.0")
override def transform(dataset: DataFrame): DataFrame = {
// Check schema
transformSchema(dataset.schema, logging = true)
@@ -134,6 +137,7 @@ final class OneVsRestModel private[ml] (
.drop(accColName)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): OneVsRestModel = {
val copied = new OneVsRestModel(
uid, labelMetadata, models.map(_.copy(extra).asInstanceOf[ClassificationModel[_, _]]))
@@ -150,30 +154,39 @@ final class OneVsRestModel private[ml] (
* Each example is scored against all k models and the model with highest score
* is picked to label the example.
*/
+@Since("1.4.0")
@Experimental
-final class OneVsRest(override val uid: String)
+final class OneVsRest @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
extends Estimator[OneVsRestModel] with OneVsRestParams {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("oneVsRest"))
/** @group setParam */
+ @Since("1.4.0")
def setClassifier(value: Classifier[_, _, _]): this.type = {
set(classifier, value.asInstanceOf[ClassifierType])
}
/** @group setParam */
+ @Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType)
}
+ @Since("1.4.0")
override def fit(dataset: DataFrame): OneVsRestModel = {
// determine number of classes either from metadata if provided, or via computation.
val labelSchema = dataset.schema($(labelCol))
@@ -222,6 +235,7 @@ final class OneVsRest(override val uid: String)
copyValues(model)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): OneVsRest = {
val copied = defaultCopy(extra).asInstanceOf[OneVsRest]
if (isDefined(classifier)) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index bae329692a..d6d85ad253 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.classification
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.tree.impl.RandomForest
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeClassifierParams, TreeEnsembleModel}
@@ -38,44 +38,59 @@ import org.apache.spark.sql.functions._
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
+@Since("1.4.0")
@Experimental
-final class RandomForestClassifier(override val uid: String)
+final class RandomForestClassifier @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
extends ProbabilisticClassifier[Vector, RandomForestClassifier, RandomForestClassificationModel]
with RandomForestParams with TreeClassifierParams {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("rfc"))
// Override parameter setters from parent trait for Java API compatibility.
// Parameters from TreeClassifierParams:
+ @Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+ @Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+ @Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)
+ @Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+ @Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+ @Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+ @Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+ @Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)
// Parameters from TreeEnsembleParams:
+ @Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+ @Since("1.4.0")
override def setSeed(value: Long): this.type = super.setSeed(value)
// Parameters from RandomForestParams:
+ @Since("1.4.0")
override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
+ @Since("1.4.0")
override def setFeatureSubsetStrategy(value: String): this.type =
super.setFeatureSubsetStrategy(value)
@@ -99,15 +114,19 @@ final class RandomForestClassifier(override val uid: String)
new RandomForestClassificationModel(trees, numFeatures, numClasses)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): RandomForestClassifier = defaultCopy(extra)
}
+@Since("1.4.0")
@Experimental
object RandomForestClassifier {
/** Accessor for supported impurity settings: entropy, gini */
+ @Since("1.4.0")
final val supportedImpurities: Array[String] = TreeClassifierParams.supportedImpurities
/** Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2 */
+ @Since("1.4.0")
final val supportedFeatureSubsetStrategies: Array[String] =
RandomForestParams.supportedFeatureSubsetStrategies
}
@@ -120,12 +139,13 @@ object RandomForestClassifier {
* @param _trees Decision trees in the ensemble.
* Warning: These have null parents.
*/
+@Since("1.4.0")
@Experimental
final class RandomForestClassificationModel private[ml] (
- override val uid: String,
+ @Since("1.5.0") override val uid: String,
private val _trees: Array[DecisionTreeClassificationModel],
- override val numFeatures: Int,
- override val numClasses: Int)
+ @Since("1.6.0") override val numFeatures: Int,
+ @Since("1.5.0") override val numClasses: Int)
extends ProbabilisticClassificationModel[Vector, RandomForestClassificationModel]
with TreeEnsembleModel with Serializable {
@@ -141,11 +161,13 @@ final class RandomForestClassificationModel private[ml] (
numClasses: Int) =
this(Identifiable.randomUID("rfc"), trees, numFeatures, numClasses)
+ @Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
// Note: We may add support for weights (based on tree performance) later on.
private lazy val _treeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0)
+ @Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights
override protected def transformImpl(dataset: DataFrame): DataFrame = {
@@ -186,11 +208,13 @@ final class RandomForestClassificationModel private[ml] (
}
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): RandomForestClassificationModel = {
copyValues(new RandomForestClassificationModel(uid, _trees, numFeatures, numClasses), extra)
.setParent(parent)
}
+ @Since("1.4.0")
override def toString: String = {
s"RandomForestClassificationModel (uid=$uid) with $numTrees trees"
}