aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala/org
diff options
context:
space:
mode:
authorEhsan M.Kermani <ehsanmo1367@gmail.com>2015-11-05 12:11:57 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-05 12:11:57 -0800
commitf80f7b69a3f81d0ea879a31c769d17ffbbac74aa (patch)
treeb83859e27c17b0e332742bee59761b40dc344005 /mllib/src/main/scala/org
parent6b87acd6649a3390b5c2c4fcb61e58d125d0d87c (diff)
downloadspark-f80f7b69a3f81d0ea879a31c769d17ffbbac74aa.tar.gz
spark-f80f7b69a3f81d0ea879a31c769d17ffbbac74aa.tar.bz2
spark-f80f7b69a3f81d0ea879a31c769d17ffbbac74aa.zip
[SPARK-10265][DOCUMENTATION, ML] Fixed @Since annotation to ml.regression
Here is my first commit. Author: Ehsan M.Kermani <ehsanmo1367@gmail.com> Closes #8728 from ehsanmok/SinceAnn.
Diffstat (limited to 'mllib/src/main/scala/org')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala33
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala30
5 files changed, 119 insertions, 18 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 88b79a4eb8..04420fc6e8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.regression
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
@@ -36,30 +36,39 @@ import org.apache.spark.sql.DataFrame
* for regression.
* It supports both continuous and categorical features.
*/
+@Since("1.4.0")
@Experimental
-final class DecisionTreeRegressor(override val uid: String)
+final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeParams with TreeRegressorParams {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("dtr"))
// Override parameter setters from parent trait for Java API compatibility.
-
+ @Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+ @Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+ @Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)
+ @Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+ @Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+ @Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+ @Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+ @Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)
override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = {
@@ -78,9 +87,11 @@ final class DecisionTreeRegressor(override val uid: String)
subsamplingRate = 1.0)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra)
}
+@Since("1.4.0")
@Experimental
object DecisionTreeRegressor {
/** Accessor for supported impurities: variance */
@@ -93,6 +104,7 @@ object DecisionTreeRegressor {
* It supports both continuous and categorical features.
* @param rootNode Root of the decision tree
*/
+@Since("1.4.0")
@Experimental
final class DecisionTreeRegressionModel private[ml] (
override val uid: String,
@@ -115,10 +127,12 @@ final class DecisionTreeRegressionModel private[ml] (
rootNode.predictImpl(features).prediction
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressionModel = {
copyValues(new DecisionTreeRegressionModel(uid, rootNode, numFeatures), extra).setParent(parent)
}
+ @Since("1.4.0")
override def toString: String = {
s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes"
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 65b5b3e072..07144cc7cf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
@@ -42,54 +42,65 @@ import org.apache.spark.sql.types.DoubleType
* learning algorithm for regression.
* It supports both continuous and categorical features.
*/
+@Since("1.4.0")
@Experimental
-final class GBTRegressor(override val uid: String)
+final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTParams with TreeRegressorParams with Logging {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("gbtr"))
// Override parameter setters from parent trait for Java API compatibility.
// Parameters from TreeRegressorParams:
-
+ @Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+ @Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+ @Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)
+ @Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+ @Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+ @Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+ @Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
/**
* The impurity setting is ignored for GBT models.
* Individual trees are built using impurity "Variance."
*/
+ @Since("1.4.0")
override def setImpurity(value: String): this.type = {
logWarning("GBTRegressor.setImpurity should NOT be used")
this
}
// Parameters from TreeEnsembleParams:
-
+ @Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+ @Since("1.4.0")
override def setSeed(value: Long): this.type = {
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
super.setSeed(value)
}
// Parameters from GBTParams:
-
+ @Since("1.4.0")
override def setMaxIter(value: Int): this.type = super.setMaxIter(value)
+ @Since("1.4.0")
override def setStepSize(value: Double): this.type = super.setStepSize(value)
// Parameters for GBTRegressor:
@@ -100,6 +111,7 @@ final class GBTRegressor(override val uid: String)
* (default = squared)
* @group param
*/
+ @Since("1.4.0")
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
" tries to minimize (case-insensitive). Supported options:" +
s" ${GBTRegressor.supportedLossTypes.mkString(", ")}",
@@ -108,9 +120,11 @@ final class GBTRegressor(override val uid: String)
setDefault(lossType -> "squared")
/** @group setParam */
+ @Since("1.4.0")
def setLossType(value: String): this.type = set(lossType, value)
/** @group getParam */
+ @Since("1.4.0")
def getLossType: String = $(lossType).toLowerCase
/** (private[ml]) Convert new loss to old loss. */
@@ -135,13 +149,16 @@ final class GBTRegressor(override val uid: String)
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures, numFeatures)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra)
}
+@Since("1.4.0")
@Experimental
object GBTRegressor {
// The losses below should be lowercase.
/** Accessor for supported loss settings: squared (L2), absolute (L1) */
+ @Since("1.4.0")
final val supportedLossTypes: Array[String] = Array("squared", "absolute").map(_.toLowerCase)
}
@@ -154,6 +171,7 @@ object GBTRegressor {
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
+@Since("1.4.0")
@Experimental
final class GBTRegressionModel private[ml](
override val uid: String,
@@ -172,11 +190,14 @@ final class GBTRegressionModel private[ml](
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
+ @Since("1.4.0")
def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) =
this(uid, _trees, _treeWeights, -1)
+ @Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
+ @Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights
override protected def transformImpl(dataset: DataFrame): DataFrame = {
@@ -194,11 +215,13 @@ final class GBTRegressionModel private[ml](
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressionModel = {
copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures),
extra).setParent(parent)
}
+ @Since("1.4.0")
override def toString: String = {
s"GBTRegressionModel (uid=$uid) with $numTrees trees"
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index f4a17c8f9a..a1fe01b047 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -18,7 +18,7 @@
package org.apache.spark.ml.regression
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol}
@@ -124,32 +124,42 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
*
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
+@Since("1.5.0")
@Experimental
-class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel]
- with IsotonicRegressionBase {
+class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
+ extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("isoReg"))
/** @group setParam */
+ @Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setIsotonic(value: Boolean): this.type = set(isotonic, value)
/** @group setParam */
+ @Since("1.5.0")
def setWeightCol(value: String): this.type = set(weightCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)
+ @Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra)
+ @Since("1.5.0")
override def fit(dataset: DataFrame): IsotonicRegressionModel = {
validateAndTransformSchema(dataset.schema, fitting = true)
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
@@ -163,6 +173,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = true)
}
@@ -178,6 +189,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
* @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]]
* model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
+@Since("1.5.0")
@Experimental
class IsotonicRegressionModel private[ml] (
override val uid: String,
@@ -185,27 +197,34 @@ class IsotonicRegressionModel private[ml] (
extends Model[IsotonicRegressionModel] with IsotonicRegressionBase {
/** @group setParam */
+ @Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)
/** Boundaries in increasing order for which predictions are known. */
+ @Since("1.5.0")
def boundaries: Vector = Vectors.dense(oldModel.boundaries)
/**
* Predictions associated with the boundaries at the same index, monotone because of isotonic
* regression.
*/
+ @Since("1.5.0")
def predictions: Vector = Vectors.dense(oldModel.predictions)
+ @Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegressionModel = {
copyValues(new IsotonicRegressionModel(uid, oldModel), extra).setParent(parent)
}
+ @Since("1.5.0")
override def transform(dataset: DataFrame): DataFrame = {
val predict = dataset.schema($(featuresCol)).dataType match {
case DoubleType =>
@@ -217,6 +236,7 @@ class IsotonicRegressionModel private[ml] (
dataset.withColumn($(predictionCol), predict(col($(featuresCol))))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = false)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 6638313818..913140e581 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -24,9 +24,9 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS,
import breeze.stats.distributions.StudentsT
import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.optim.WeightedLeastSquares
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared._
@@ -61,11 +61,13 @@ private[regression] trait LinearRegressionParams extends PredictorParams
* - L1 (Lasso)
* - L2 + L1 (elastic net)
*/
+@Since("1.3.0")
@Experimental
-class LinearRegression(override val uid: String)
+class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
extends Regressor[Vector, LinearRegression, LinearRegressionModel]
with LinearRegressionParams with Logging {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("linReg"))
/**
@@ -73,6 +75,7 @@ class LinearRegression(override val uid: String)
* Default is 0.0.
* @group setParam
*/
+ @Since("1.3.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)
@@ -81,6 +84,7 @@ class LinearRegression(override val uid: String)
* Default is true.
* @group setParam
*/
+ @Since("1.5.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true)
@@ -93,6 +97,7 @@ class LinearRegression(override val uid: String)
* Default is true.
* @group setParam
*/
+ @Since("1.5.0")
def setStandardization(value: Boolean): this.type = set(standardization, value)
setDefault(standardization -> true)
@@ -103,6 +108,7 @@ class LinearRegression(override val uid: String)
* Default is 0.0 which is an L2 penalty.
* @group setParam
*/
+ @Since("1.4.0")
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
setDefault(elasticNetParam -> 0.0)
@@ -111,6 +117,7 @@ class LinearRegression(override val uid: String)
* Default is 100.
* @group setParam
*/
+ @Since("1.3.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100)
@@ -120,6 +127,7 @@ class LinearRegression(override val uid: String)
* Default is 1E-6.
* @group setParam
*/
+ @Since("1.4.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)
@@ -129,6 +137,7 @@ class LinearRegression(override val uid: String)
* Default is empty, so all instances have weight one.
* @group setParam
*/
+ @Since("1.6.0")
def setWeightCol(value: String): this.type = set(weightCol, value)
setDefault(weightCol -> "")
@@ -139,6 +148,7 @@ class LinearRegression(override val uid: String)
* selected automatically.
* @group setParam
*/
+ @Since("1.6.0")
def setSolver(value: String): this.type = set(solver, value)
setDefault(solver -> "auto")
@@ -329,6 +339,7 @@ class LinearRegression(override val uid: String)
model.setSummary(trainingSummary)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra)
}
@@ -336,6 +347,7 @@ class LinearRegression(override val uid: String)
* :: Experimental ::
* Model produced by [[LinearRegression]].
*/
+@Since("1.3.0")
@Experimental
class LinearRegressionModel private[ml] (
override val uid: String,
@@ -355,6 +367,7 @@ class LinearRegressionModel private[ml] (
* Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is
* thrown if `trainingSummary == None`.
*/
+ @Since("1.5.0")
def summary: LinearRegressionTrainingSummary = trainingSummary match {
case Some(summ) => summ
case None =>
@@ -369,6 +382,7 @@ class LinearRegressionModel private[ml] (
}
/** Indicates whether a training summary exists for this model instance. */
+ @Since("1.5.0")
def hasSummary: Boolean = trainingSummary.isDefined
/**
@@ -402,6 +416,7 @@ class LinearRegressionModel private[ml] (
dot(features, coefficients) + intercept
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): LinearRegressionModel = {
val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra)
if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
@@ -416,6 +431,7 @@ class LinearRegressionModel private[ml] (
* @param predictions predictions outputted by the model's `transform` method.
* @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
*/
+@Since("1.5.0")
@Experimental
class LinearRegressionTrainingSummary private[regression] (
predictions: DataFrame,
@@ -428,6 +444,7 @@ class LinearRegressionTrainingSummary private[regression] (
extends LinearRegressionSummary(predictions, predictionCol, labelCol, model, diagInvAtWA) {
/** Number of training iterations until termination */
+ @Since("1.5.0")
val totalIterations = objectiveHistory.length
}
@@ -437,6 +454,7 @@ class LinearRegressionTrainingSummary private[regression] (
* Linear regression results evaluated on a dataset.
* @param predictions predictions outputted by the model's `transform` method.
*/
+@Since("1.5.0")
@Experimental
class LinearRegressionSummary private[regression] (
@transient val predictions: DataFrame,
@@ -455,33 +473,39 @@ class LinearRegressionSummary private[regression] (
* explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
* Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
*/
+ @Since("1.5.0")
val explainedVariance: Double = metrics.explainedVariance
/**
* Returns the mean absolute error, which is a risk function corresponding to the
* expected value of the absolute error loss or l1-norm loss.
*/
+ @Since("1.5.0")
val meanAbsoluteError: Double = metrics.meanAbsoluteError
/**
* Returns the mean squared error, which is a risk function corresponding to the
* expected value of the squared error loss or quadratic loss.
*/
+ @Since("1.5.0")
val meanSquaredError: Double = metrics.meanSquaredError
/**
* Returns the root mean squared error, which is defined as the square root of
* the mean squared error.
*/
+ @Since("1.5.0")
val rootMeanSquaredError: Double = metrics.rootMeanSquaredError
/**
* Returns R^2^, the coefficient of determination.
* Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
*/
+ @Since("1.5.0")
val r2: Double = metrics.r2
/** Residuals (label - predicted value) */
+ @Since("1.5.0")
@transient lazy val residuals: DataFrame = {
val t = udf { (pred: Double, label: Double) => label - pred }
predictions.select(t(col(predictionCol), col(labelCol)).as("residuals"))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 64fc17247c..71e40b513e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.regression
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams}
@@ -37,44 +37,55 @@ import org.apache.spark.sql.functions._
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
* It supports both continuous and categorical features.
*/
+@Since("1.4.0")
@Experimental
-final class RandomForestRegressor(override val uid: String)
+final class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
with RandomForestParams with TreeRegressorParams {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("rfr"))
// Override parameter setters from parent trait for Java API compatibility.
// Parameters from TreeRegressorParams:
-
+ @Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+ @Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+ @Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)
+ @Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+ @Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+ @Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+ @Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+ @Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)
// Parameters from TreeEnsembleParams:
-
+ @Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+ @Since("1.4.0")
override def setSeed(value: Long): this.type = super.setSeed(value)
// Parameters from RandomForestParams:
-
+ @Since("1.4.0")
override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
+ @Since("1.4.0")
override def setFeatureSubsetStrategy(value: String): this.type =
super.setFeatureSubsetStrategy(value)
@@ -91,15 +102,19 @@ final class RandomForestRegressor(override val uid: String)
new RandomForestRegressionModel(trees, numFeatures)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): RandomForestRegressor = defaultCopy(extra)
}
+@Since("1.4.0")
@Experimental
object RandomForestRegressor {
/** Accessor for supported impurity settings: variance */
+ @Since("1.4.0")
final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
/** Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2 */
+ @Since("1.4.0")
final val supportedFeatureSubsetStrategies: Array[String] =
RandomForestParams.supportedFeatureSubsetStrategies
}
@@ -111,6 +126,7 @@ object RandomForestRegressor {
* @param _trees Decision trees in the ensemble.
* @param numFeatures Number of features used by this model
*/
+@Since("1.4.0")
@Experimental
final class RandomForestRegressionModel private[ml] (
override val uid: String,
@@ -128,11 +144,13 @@ final class RandomForestRegressionModel private[ml] (
private[ml] def this(trees: Array[DecisionTreeRegressionModel], numFeatures: Int) =
this(Identifiable.randomUID("rfr"), trees, numFeatures)
+ @Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
// Note: We may add support for weights (based on tree performance) later on.
private lazy val _treeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0)
+ @Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights
override protected def transformImpl(dataset: DataFrame): DataFrame = {
@@ -150,10 +168,12 @@ final class RandomForestRegressionModel private[ml] (
_trees.map(_.rootNode.predictImpl(features).prediction).sum / numTrees
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): RandomForestRegressionModel = {
copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), extra).setParent(parent)
}
+ @Since("1.4.0")
override def toString: String = {
s"RandomForestRegressionModel (uid=$uid) with $numTrees trees"
}