aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2015-12-02 14:15:54 -0800
committerXiangrui Meng <meng@databricks.com>2015-12-02 14:15:54 -0800
commitde07d06abecf3516c95d099b6c01a86e0c8cfd8c (patch)
treeeb3a3235e107b4e33c0a157b12f75fa618279d1f /mllib
parent452690ba1cc3c667bdd9f3022c43c9a10267880b (diff)
downloadspark-de07d06abecf3516c95d099b6c01a86e0c8cfd8c.tar.gz
spark-de07d06abecf3516c95d099b6c01a86e0c8cfd8c.tar.bz2
spark-de07d06abecf3516c95d099b6c01a86e0c8cfd8c.zip
[SPARK-10266][DOCUMENTATION, ML] Fixed @Since annotation for ml.tunning
cc mengxr noel-smith I worked on this issues based on https://github.com/apache/spark/pull/8729. ehsanmok thank you for your contricution! Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Author: Ehsan M.Kermani <ehsanmo1367@gmail.com> Closes #9338 from yu-iskw/JIRA-10266.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala34
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala26
3 files changed, 58 insertions, 16 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 83a9048374..5c09f1aaff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -19,18 +19,18 @@ package org.apache.spark.ml.tuning
import com.github.fommil.netlib.F2jBLAS
import org.apache.hadoop.fs.Path
-import org.json4s.{JObject, DefaultFormats}
import org.json4s.jackson.JsonMethods._
+import org.json4s.{DefaultFormats, JObject}
-import org.apache.spark.ml.classification.OneVsRestParams
-import org.apache.spark.ml.feature.RFormulaModel
-import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.{Logging, SparkContext}
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml._
+import org.apache.spark.ml.classification.OneVsRestParams
import org.apache.spark.ml.evaluation.Evaluator
+import org.apache.spark.ml.feature.RFormulaModel
import org.apache.spark.ml.param._
-import org.apache.spark.ml.util._
import org.apache.spark.ml.util.DefaultParamsReader.Metadata
+import org.apache.spark.ml.util._
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.StructType
@@ -58,26 +58,34 @@ private[ml] trait CrossValidatorParams extends ValidatorParams {
* :: Experimental ::
* K-fold cross validation.
*/
+@Since("1.2.0")
@Experimental
-class CrossValidator(override val uid: String) extends Estimator[CrossValidatorModel]
+class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
+ extends Estimator[CrossValidatorModel]
with CrossValidatorParams with MLWritable with Logging {
+ @Since("1.2.0")
def this() = this(Identifiable.randomUID("cv"))
private val f2jBLAS = new F2jBLAS
/** @group setParam */
+ @Since("1.2.0")
def setEstimator(value: Estimator[_]): this.type = set(estimator, value)
/** @group setParam */
+ @Since("1.2.0")
def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value)
/** @group setParam */
+ @Since("1.2.0")
def setEvaluator(value: Evaluator): this.type = set(evaluator, value)
/** @group setParam */
+ @Since("1.2.0")
def setNumFolds(value: Int): this.type = set(numFolds, value)
+ @Since("1.4.0")
override def fit(dataset: DataFrame): CrossValidatorModel = {
val schema = dataset.schema
transformSchema(schema, logging = true)
@@ -116,10 +124,12 @@ class CrossValidator(override val uid: String) extends Estimator[CrossValidatorM
copyValues(new CrossValidatorModel(uid, bestModel, metrics).setParent(this))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
$(estimator).transformSchema(schema)
}
+ @Since("1.4.0")
override def validateParams(): Unit = {
super.validateParams()
val est = $(estimator)
@@ -128,6 +138,7 @@ class CrossValidator(override val uid: String) extends Estimator[CrossValidatorM
}
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): CrossValidator = {
val copied = defaultCopy(extra).asInstanceOf[CrossValidator]
if (copied.isDefined(estimator)) {
@@ -308,26 +319,31 @@ object CrossValidator extends MLReadable[CrossValidator] {
* @param avgMetrics Average cross-validation metrics for each paramMap in
* [[CrossValidator.estimatorParamMaps]], in the corresponding order.
*/
+@Since("1.2.0")
@Experimental
class CrossValidatorModel private[ml] (
- override val uid: String,
- val bestModel: Model[_],
- val avgMetrics: Array[Double])
+ @Since("1.4.0") override val uid: String,
+ @Since("1.2.0") val bestModel: Model[_],
+ @Since("1.5.0") val avgMetrics: Array[Double])
extends Model[CrossValidatorModel] with CrossValidatorParams with MLWritable {
+ @Since("1.4.0")
override def validateParams(): Unit = {
bestModel.validateParams()
}
+ @Since("1.4.0")
override def transform(dataset: DataFrame): DataFrame = {
transformSchema(dataset.schema, logging = true)
bestModel.transform(dataset)
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
bestModel.transformSchema(schema)
}
+ @Since("1.4.0")
override def copy(extra: ParamMap): CrossValidatorModel = {
val copied = new CrossValidatorModel(
uid,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
index 98a8f0330c..b836d2a234 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -20,21 +20,23 @@ package org.apache.spark.ml.tuning
import scala.annotation.varargs
import scala.collection.mutable
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.param._
/**
* :: Experimental ::
* Builder for a param grid used in grid search-based model selection.
*/
+@Since("1.2.0")
@Experimental
-class ParamGridBuilder {
+class ParamGridBuilder @Since("1.2.0") {
private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]
/**
* Sets the given parameters in this grid to fixed values.
*/
+ @Since("1.2.0")
def baseOn(paramMap: ParamMap): this.type = {
baseOn(paramMap.toSeq: _*)
this
@@ -43,6 +45,7 @@ class ParamGridBuilder {
/**
* Sets the given parameters in this grid to fixed values.
*/
+ @Since("1.2.0")
@varargs
def baseOn(paramPairs: ParamPair[_]*): this.type = {
paramPairs.foreach { p =>
@@ -54,6 +57,7 @@ class ParamGridBuilder {
/**
* Adds a param with multiple values (overwrites if the input param exists).
*/
+ @Since("1.2.0")
def addGrid[T](param: Param[T], values: Iterable[T]): this.type = {
paramGrid.put(param, values)
this
@@ -64,6 +68,7 @@ class ParamGridBuilder {
/**
* Adds a double param with multiple values.
*/
+ @Since("1.2.0")
def addGrid(param: DoubleParam, values: Array[Double]): this.type = {
addGrid[Double](param, values)
}
@@ -71,6 +76,7 @@ class ParamGridBuilder {
/**
* Adds a int param with multiple values.
*/
+ @Since("1.2.0")
def addGrid(param: IntParam, values: Array[Int]): this.type = {
addGrid[Int](param, values)
}
@@ -78,6 +84,7 @@ class ParamGridBuilder {
/**
* Adds a float param with multiple values.
*/
+ @Since("1.2.0")
def addGrid(param: FloatParam, values: Array[Float]): this.type = {
addGrid[Float](param, values)
}
@@ -85,6 +92,7 @@ class ParamGridBuilder {
/**
* Adds a long param with multiple values.
*/
+ @Since("1.2.0")
def addGrid(param: LongParam, values: Array[Long]): this.type = {
addGrid[Long](param, values)
}
@@ -92,6 +100,7 @@ class ParamGridBuilder {
/**
* Adds a boolean param with true and false.
*/
+ @Since("1.2.0")
def addGrid(param: BooleanParam): this.type = {
addGrid[Boolean](param, Array(true, false))
}
@@ -99,6 +108,7 @@ class ParamGridBuilder {
/**
* Builds and returns all combinations of parameters specified by the param grid.
*/
+ @Since("1.2.0")
def build(): Array[ParamMap] = {
var paramMaps = Array(new ParamMap)
paramGrid.foreach { case (param, values) =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 73a14b8310..adf0630204 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -18,7 +18,7 @@
package org.apache.spark.ml.tuning
import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.evaluation.Evaluator
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param.{DoubleParam, ParamMap, ParamValidators}
@@ -51,24 +51,32 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams {
* and uses evaluation metric on the validation set to select the best model.
* Similar to [[CrossValidator]], but only splits the set once.
*/
+@Since("1.5.0")
@Experimental
-class TrainValidationSplit(override val uid: String) extends Estimator[TrainValidationSplitModel]
+class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String)
+ extends Estimator[TrainValidationSplitModel]
with TrainValidationSplitParams with Logging {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("tvs"))
/** @group setParam */
+ @Since("1.5.0")
def setEstimator(value: Estimator[_]): this.type = set(estimator, value)
/** @group setParam */
+ @Since("1.5.0")
def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value)
/** @group setParam */
+ @Since("1.5.0")
def setEvaluator(value: Evaluator): this.type = set(evaluator, value)
/** @group setParam */
+ @Since("1.5.0")
def setTrainRatio(value: Double): this.type = set(trainRatio, value)
+ @Since("1.5.0")
override def fit(dataset: DataFrame): TrainValidationSplitModel = {
val schema = dataset.schema
transformSchema(schema, logging = true)
@@ -108,10 +116,12 @@ class TrainValidationSplit(override val uid: String) extends Estimator[TrainVali
copyValues(new TrainValidationSplitModel(uid, bestModel, metrics).setParent(this))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
$(estimator).transformSchema(schema)
}
+ @Since("1.5.0")
override def validateParams(): Unit = {
super.validateParams()
val est = $(estimator)
@@ -120,6 +130,7 @@ class TrainValidationSplit(override val uid: String) extends Estimator[TrainVali
}
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): TrainValidationSplit = {
val copied = defaultCopy(extra).asInstanceOf[TrainValidationSplit]
if (copied.isDefined(estimator)) {
@@ -140,26 +151,31 @@ class TrainValidationSplit(override val uid: String) extends Estimator[TrainVali
* @param bestModel Estimator determined best model.
* @param validationMetrics Evaluated validation metrics.
*/
+@Since("1.5.0")
@Experimental
class TrainValidationSplitModel private[ml] (
- override val uid: String,
- val bestModel: Model[_],
- val validationMetrics: Array[Double])
+ @Since("1.5.0") override val uid: String,
+ @Since("1.5.0") val bestModel: Model[_],
+ @Since("1.5.0") val validationMetrics: Array[Double])
extends Model[TrainValidationSplitModel] with TrainValidationSplitParams {
+ @Since("1.5.0")
override def validateParams(): Unit = {
bestModel.validateParams()
}
+ @Since("1.5.0")
override def transform(dataset: DataFrame): DataFrame = {
transformSchema(dataset.schema, logging = true)
bestModel.transform(dataset)
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
bestModel.transformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): TrainValidationSplitModel = {
val copied = new TrainValidationSplitModel (
uid,