aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-05-26 15:51:31 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-26 15:51:31 -0700
commit836a75898fdc4b10d4d00676ef29e24cc96f09fd (patch)
treece1cd5109cdfa80e70fa3cfbe0eb2a83c83bd3c7 /mllib
parent9f742241cbf07e5e2dadfee8dcc9b382bb2dbea1 (diff)
downloadspark-836a75898fdc4b10d4d00676ef29e24cc96f09fd.tar.gz
spark-836a75898fdc4b10d4d00676ef29e24cc96f09fd.tar.bz2
spark-836a75898fdc4b10d4d00676ef29e24cc96f09fd.zip
[SPARK-7748] [MLLIB] Graduate spark.ml from alpha
With descent coverage of feature transformers, algorithms, and model tuning support, it is time to graduate `spark.ml` from alpha. This PR changes all `AlphaComponent` annotations to either `DeveloperApi` or `Experimental`, depending on whether we expect a class/method to be used by end users (who use the pipeline API to assemble/tune their ML pipelines but not to create new pipeline components.) `UnaryTransformer` becomes a `DeveloperApi` in this PR. jkbradley harsha2010 Author: Xiangrui Meng <meng@databricks.com> Closes #6417 from mengxr/SPARK-7748 and squashes the following commits: effbccd [Xiangrui Meng] organize imports c15028e [Xiangrui Meng] added missing docs 1b2e5f8 [Xiangrui Meng] update package doc 73ca791 [Xiangrui Meng] alpha -> ex/dev for the rest 93819db [Xiangrui Meng] alpha -> ex/dev in ml.param 55ca073 [Xiangrui Meng] alpha -> ex/dev in ml.feature 83572f1 [Xiangrui Meng] add Experimental and DeveloperApi tags (wip)
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Estimator.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Model.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Predictor.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Transformer.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala29
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala15
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala15
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala19
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala15
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/package-info.java6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/package.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/param/params.scala59
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala15
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala15
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala6
43 files changed, 267 insertions, 201 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
index 9e16e60270..e9a5d7c0e7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
@@ -19,15 +19,15 @@ package org.apache.spark.ml
import scala.annotation.varargs
-import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.param.{ParamMap, ParamPair, Params}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.ml.param.{ParamMap, ParamPair}
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* Abstract class for estimators that fit models to data.
*/
-@AlphaComponent
+@DeveloperApi
abstract class Estimator[M <: Model[M]] extends PipelineStage {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
index 70e7495ac6..186bf7ae7a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Model.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
@@ -17,16 +17,16 @@
package org.apache.spark.ml
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param.ParamMap
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* A fitted model, i.e., a [[Transformer]] produced by an [[Estimator]].
*
* @tparam M model type
*/
-@AlphaComponent
+@DeveloperApi
abstract class Model[M <: Model[M]] extends Transformer {
/**
* The parent estimator that produced this model.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 43bee1b770..9da3ff65c7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -20,17 +20,17 @@ package org.apache.spark.ml
import scala.collection.mutable.ListBuffer
import org.apache.spark.Logging
-import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
import org.apache.spark.ml.param.{Param, ParamMap, Params}
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.StructType
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* A stage in a pipeline, either an [[Estimator]] or a [[Transformer]].
*/
-@AlphaComponent
+@DeveloperApi
abstract class PipelineStage extends Params with Logging {
/**
@@ -69,7 +69,7 @@ abstract class PipelineStage extends Params with Logging {
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each
* of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the
* stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will
@@ -80,7 +80,7 @@ abstract class PipelineStage extends Params with Logging {
* transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as
* an identity transformer.
*/
-@AlphaComponent
+@Experimental
class Pipeline(override val uid: String) extends Estimator[PipelineModel] {
def this() = this(Identifiable.randomUID("pipeline"))
@@ -169,10 +169,10 @@ class Pipeline(override val uid: String) extends Estimator[PipelineModel] {
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Represents a fitted pipeline.
*/
-@AlphaComponent
+@Experimental
class PipelineModel private[ml] (
override val uid: String,
val stages: Array[Transformer])
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index ec0f76aa66..e752b81a14 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -58,7 +58,6 @@ private[ml] trait PredictorParams extends Params
/**
* :: DeveloperApi ::
- *
* Abstraction for prediction problems (regression and classification).
*
* @tparam FeaturesType Type of features.
@@ -113,7 +112,6 @@ abstract class Predictor[
*
* The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
*/
- @DeveloperApi
private[ml] def featuresDataType: DataType = new VectorUDT
override def transformSchema(schema: StructType): StructType = {
@@ -134,7 +132,6 @@ abstract class Predictor[
/**
* :: DeveloperApi ::
- *
* Abstraction for a model for prediction tasks (regression and classification).
*
* @tparam FeaturesType Type of features.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
index 38bb6a5a53..f07f733a5d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml
import scala.annotation.varargs
import org.apache.spark.Logging
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.sql.DataFrame
@@ -28,10 +28,10 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* Abstract class for transformers that transform one dataset into another.
*/
-@AlphaComponent
+@DeveloperApi
abstract class Transformer extends PipelineStage {
/**
@@ -73,10 +73,12 @@ abstract class Transformer extends PipelineStage {
}
/**
+ * :: DeveloperApi ::
* Abstract class for transformers that take one input column, apply transformation, and output the
* result as a new column.
*/
-private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
+@DeveloperApi
+abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
extends Transformer with HasInputCol with HasOutputCol with Logging {
/** @group setParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
index f5f37aa779..457c15830f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
@@ -19,10 +19,12 @@ package org.apache.spark.ml.attribute
import scala.collection.mutable.ArrayBuffer
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types.{Metadata, MetadataBuilder, StructField}
/**
+ * :: DeveloperApi ::
* Attributes that describe a vector ML column.
*
* @param name name of the attribute group (the ML column name)
@@ -31,6 +33,7 @@ import org.apache.spark.sql.types.{Metadata, MetadataBuilder, StructField}
* @param attrs optional array of attributes. Attribute will be copied with their corresponding
* indices in the array.
*/
+@DeveloperApi
class AttributeGroup private (
val name: String,
val numAttributes: Option[Int],
@@ -182,7 +185,11 @@ class AttributeGroup private (
}
}
-/** Factory methods to create attribute groups. */
+/**
+ * :: DeveloperApi ::
+ * Factory methods to create attribute groups.
+ */
+@DeveloperApi
object AttributeGroup {
import AttributeKeys._
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
index a83febd7de..5c7089b491 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
@@ -17,12 +17,17 @@
package org.apache.spark.ml.attribute
+import org.apache.spark.annotation.DeveloperApi
+
/**
+ * :: DeveloperApi ::
* An enum-like type for attribute types: [[AttributeType$#Numeric]], [[AttributeType$#Nominal]],
* and [[AttributeType$#Binary]].
*/
+@DeveloperApi
sealed abstract class AttributeType(val name: String)
+@DeveloperApi
object AttributeType {
/** Numeric type. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index e8f7f15278..ce43a450da 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -19,11 +19,14 @@ package org.apache.spark.ml.attribute
import scala.annotation.varargs
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.types.{DoubleType, Metadata, MetadataBuilder, StructField}
/**
+ * :: DeveloperApi ::
* Abstract class for ML attributes.
*/
+@DeveloperApi
sealed abstract class Attribute extends Serializable {
name.foreach { n =>
@@ -135,6 +138,10 @@ private[attribute] trait AttributeFactory {
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
object Attribute extends AttributeFactory {
private[attribute] override def fromMetadata(metadata: Metadata): Attribute = {
@@ -163,6 +170,7 @@ object Attribute extends AttributeFactory {
/**
+ * :: DeveloperApi ::
* A numeric attribute with optional summary statistics.
* @param name optional name
* @param index optional index
@@ -171,6 +179,7 @@ object Attribute extends AttributeFactory {
* @param std optional standard deviation
* @param sparsity optional sparsity (ratio of zeros)
*/
+@DeveloperApi
class NumericAttribute private[ml] (
override val name: Option[String] = None,
override val index: Option[Int] = None,
@@ -278,8 +287,10 @@ class NumericAttribute private[ml] (
}
/**
+ * :: DeveloperApi ::
* Factory methods for numeric attributes.
*/
+@DeveloperApi
object NumericAttribute extends AttributeFactory {
/** The default numeric attribute. */
@@ -298,6 +309,7 @@ object NumericAttribute extends AttributeFactory {
}
/**
+ * :: DeveloperApi ::
* A nominal attribute.
* @param name optional name
* @param index optional index
@@ -306,6 +318,7 @@ object NumericAttribute extends AttributeFactory {
* defined.
* @param values optional values. At most one of `numValues` and `values` can be defined.
*/
+@DeveloperApi
class NominalAttribute private[ml] (
override val name: Option[String] = None,
override val index: Option[Int] = None,
@@ -430,7 +443,11 @@ class NominalAttribute private[ml] (
}
}
-/** Factory methods for nominal attributes. */
+/**
+ * :: DeveloperApi ::
+ * Factory methods for nominal attributes.
+ */
+@DeveloperApi
object NominalAttribute extends AttributeFactory {
/** The default nominal attribute. */
@@ -450,11 +467,13 @@ object NominalAttribute extends AttributeFactory {
}
/**
+ * :: DeveloperApi ::
* A binary attribute.
* @param name optional name
* @param index optional index
* @param values optionla values. If set, its size must be 2.
*/
+@DeveloperApi
class BinaryAttribute private[ml] (
override val name: Option[String] = None,
override val index: Option[Int] = None,
@@ -526,7 +545,11 @@ class BinaryAttribute private[ml] (
}
}
-/** Factory methods for binary attributes. */
+/**
+ * :: DeveloperApi ::
+ * Factory methods for binary attributes.
+ */
+@DeveloperApi
object BinaryAttribute extends AttributeFactory {
/** The default binary attribute. */
@@ -543,8 +566,10 @@ object BinaryAttribute extends AttributeFactory {
}
/**
+ * :: DeveloperApi ::
* An unresolved attribute.
*/
+@DeveloperApi
object UnresolvedAttribute extends Attribute {
override def attrType: AttributeType = AttributeType.Unresolved
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 7c961332bf..8030e0728a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -17,10 +17,10 @@
package org.apache.spark.ml.classification
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{TreeClassifierParams, DecisionTreeParams, DecisionTreeModel, Node}
+import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeClassifierParams}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,14 +31,13 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
* for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
-@AlphaComponent
+@Experimental
final class DecisionTreeClassifier(override val uid: String)
extends Predictor[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel]
with DecisionTreeParams with TreeClassifierParams {
@@ -89,19 +88,19 @@ final class DecisionTreeClassifier(override val uid: String)
}
}
+@Experimental
object DecisionTreeClassifier {
/** Accessor for supported impurities: entropy, gini */
final val supportedImpurities: Array[String] = TreeClassifierParams.supportedImpurities
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
-@AlphaComponent
+@Experimental
final class DecisionTreeClassificationModel private[ml] (
override val uid: String,
override val rootNode: Node)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index d504d84beb..d8592eb2d9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -20,11 +20,11 @@ package org.apache.spark.ml.classification
import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
-import org.apache.spark.ml.tree.{GBTParams, TreeClassifierParams, DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeClassifierParams, TreeEnsembleModel}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -36,14 +36,13 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* learning algorithm for classification.
* It supports binary labels, as well as both continuous and categorical features.
* Note: Multiclass labels are not currently supported.
*/
-@AlphaComponent
+@Experimental
final class GBTClassifier(override val uid: String)
extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
with GBTParams with TreeClassifierParams with Logging {
@@ -144,6 +143,7 @@ final class GBTClassifier(override val uid: String)
}
}
+@Experimental
object GBTClassifier {
// The losses below should be lowercase.
/** Accessor for supported loss settings: logistic */
@@ -151,8 +151,7 @@ object GBTClassifier {
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* model for classification.
* It supports binary labels, as well as both continuous and categorical features.
@@ -160,7 +159,7 @@ object GBTClassifier {
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
-@AlphaComponent
+@Experimental
final class GBTClassificationModel(
override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 8694c96e4c..d13109d9da 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -19,11 +19,11 @@ package org.apache.spark.ml.classification
import scala.collection.mutable
-import breeze.linalg.{norm => brzNorm, DenseVector => BDV}
-import breeze.optimize.{LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
-import breeze.optimize.{CachedDiffFunction, DiffFunction}
+import breeze.linalg.{DenseVector => BDV, norm => brzNorm}
+import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.Identifiable
@@ -35,7 +35,6 @@ import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
import org.apache.spark.storage.StorageLevel
-import org.apache.spark.{SparkException, Logging}
/**
* Params for logistic regression.
@@ -45,12 +44,11 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
with HasThreshold
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Logistic regression.
* Currently, this class only supports binary classification.
*/
-@AlphaComponent
+@Experimental
class LogisticRegression(override val uid: String)
extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel]
with LogisticRegressionParams with Logging {
@@ -221,11 +219,10 @@ class LogisticRegression(override val uid: String)
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Model produced by [[LogisticRegression]].
*/
-@AlphaComponent
+@Experimental
class LogisticRegressionModel private[ml] (
override val uid: String,
val weights: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 1543f051cc..36735cd834 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -21,7 +21,7 @@ import java.util.UUID
import scala.language.existentials
-import org.apache.spark.annotation.{AlphaComponent, Experimental}
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml._
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.param.Param
@@ -54,8 +54,7 @@ private[ml] trait OneVsRestParams extends PredictorParams {
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Model produced by [[OneVsRest]].
* This stores the models resulting from training k binary classifiers: one for each class.
* Each example is scored against all k models, and the model with the highest score
@@ -67,7 +66,7 @@ private[ml] trait OneVsRestParams extends PredictorParams {
* The i-th model is produced by testing the i-th class (taking label 1) vs the rest
* (taking label 0).
*/
-@AlphaComponent
+@Experimental
final class OneVsRestModel private[ml] (
override val uid: String,
labelMetadata: Metadata,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index a1de791985..67600ebd7b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -19,10 +19,10 @@ package org.apache.spark.ml.classification
import scala.collection.mutable
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{RandomForestParams, TreeClassifierParams, DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeClassifierParams, TreeEnsembleModel}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -33,14 +33,13 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for
* classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
-@AlphaComponent
+@Experimental
final class RandomForestClassifier(override val uid: String)
extends Predictor[Vector, RandomForestClassifier, RandomForestClassificationModel]
with RandomForestParams with TreeClassifierParams {
@@ -100,6 +99,7 @@ final class RandomForestClassifier(override val uid: String)
}
}
+@Experimental
object RandomForestClassifier {
/** Accessor for supported impurity settings: entropy, gini */
final val supportedImpurities: Array[String] = TreeClassifierParams.supportedImpurities
@@ -110,15 +110,14 @@ object RandomForestClassifier {
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
* @param _trees Decision trees in the ensemble.
* Warning: These have null parents.
*/
-@AlphaComponent
+@Experimental
final class RandomForestClassificationModel private[ml] (
override val uid: String,
private val _trees: Array[DecisionTreeClassificationModel])
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index ddbdd00ceb..f695ddaeef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -17,8 +17,7 @@
package org.apache.spark.ml.evaluation
-import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.evaluation.Evaluator
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
@@ -28,11 +27,10 @@ import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.types.DoubleType
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Evaluator for binary classification, which expects two input columns: score and label.
*/
-@AlphaComponent
+@Experimental
class BinaryClassificationEvaluator(override val uid: String)
extends Evaluator with HasRawPredictionCol with HasLabelCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
index cabd1c97c0..61e937e693 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
@@ -17,15 +17,15 @@
package org.apache.spark.ml.evaluation
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param.{ParamMap, Params}
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* Abstract class for evaluators that compute metrics from predictions.
*/
-@AlphaComponent
+@DeveloperApi
abstract class Evaluator extends Params {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index 80458928c5..1771177e1e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param.{Param, ParamValidators}
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
@@ -26,11 +26,10 @@ import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.types.DoubleType
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Evaluator for regression, which expects two input columns: prediction and label.
*/
-@AlphaComponent
+@Experimental
final class RegressionEvaluator(override val uid: String)
extends Evaluator with HasPredictionCol with HasLabelCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 62f4a63434..b06122d733 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.BinaryAttribute
import org.apache.spark.ml.param._
@@ -28,10 +28,10 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{DoubleType, StructType}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Binarize a column of continuous features given a threshold.
*/
-@AlphaComponent
+@Experimental
final class Binarizer(override val uid: String)
extends Transformer with HasInputCol with HasOutputCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index ac8dfb5632..a3d1f6f65c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import java.{util => ju}
import org.apache.spark.SparkException
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.Model
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.param._
@@ -31,10 +31,10 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* `Bucketizer` maps a column of continuous features to a column of feature buckets.
*/
-@AlphaComponent
+@Experimental
final class Bucketizer(override val uid: String)
extends Model[Bucketizer] with HasInputCol with HasOutputCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
index 8b32eee0e4..3ae1833390 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.Param
import org.apache.spark.ml.util.Identifiable
@@ -26,12 +26,12 @@ import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
import org.apache.spark.sql.types.DataType
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
* provided "weight" vector. In other words, it scales each column of the dataset by a scalar
* multiplier.
*/
-@AlphaComponent
+@Experimental
class ElementwiseProduct(override val uid: String)
extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index 8942d45219..f936aef80f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -17,22 +17,22 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
import org.apache.spark.ml.param.{IntParam, ParamValidators}
+import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
import org.apache.spark.mllib.feature
import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.functions.{udf, col}
+import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.{ArrayType, StructType}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Maps a sequence of terms to their term frequencies using the hashing trick.
*/
-@AlphaComponent
+@Experimental
class HashingTF(override val uid: String) extends Transformer with HasInputCol with HasOutputCol {
def this() = this(Identifiable.randomUID("hashingTF"))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 788c392050..376b84530c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml._
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -58,10 +58,10 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Compute the Inverse Document Frequency (IDF) given a collection of documents.
*/
-@AlphaComponent
+@Experimental
final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase {
def this() = this(Identifiable.randomUID("idf"))
@@ -85,10 +85,10 @@ final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBa
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Model fitted by [[IDF]].
*/
-@AlphaComponent
+@Experimental
class IDFModel private[ml] (
override val uid: String,
idfModel: feature.IDFModel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index 3f689d1585..8282e5ffa1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.{DoubleParam, ParamValidators}
import org.apache.spark.ml.util.Identifiable
@@ -26,10 +26,10 @@ import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
import org.apache.spark.sql.types.DataType
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Normalize a vector to have unit norm using the given p-norm.
*/
-@AlphaComponent
+@Experimental
class Normalizer(override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] {
def this() = this(Identifiable.randomUID("normalizer"))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 1fb9b9ae75..eb6ec49f85 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -18,16 +18,17 @@
package org.apache.spark.ml.feature
import org.apache.spark.SparkException
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.attribute.{Attribute, BinaryAttribute, NominalAttribute}
-import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors}
import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
/**
+ * :: Experimental ::
* A one-hot encoder that maps a column of label indices to a column of binary vectors, with
* at most a single one-value. By default, the binary vector has an element for each category, so
* with 5 categories, an input value of 2.0 would map to an output vector of
@@ -36,7 +37,7 @@ import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
* of 0.0 would map to a vector of all zeros. Including the first category makes the vector columns
* linearly dependent because they sum up to one.
*/
-@AlphaComponent
+@Experimental
class OneHotEncoder(override val uid: String)
extends UnaryTransformer[Double, Vector, OneHotEncoder] with HasInputCol with HasOutputCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 8ddf9d6a1e..442e958202 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
import scala.collection.mutable
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.{IntParam, ParamValidators}
import org.apache.spark.ml.util.Identifiable
@@ -27,14 +27,14 @@ import org.apache.spark.mllib.linalg._
import org.apache.spark.sql.types.DataType
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
* which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
* expansion of a product of sums expresses it as a sum of products by using the fact that
* multiplication distributes over addition". Take a 2-variable feature vector as an example:
* `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
*/
-@AlphaComponent
+@Experimental
class PolynomialExpansion(override val uid: String)
extends UnaryTransformer[Vector, Vector, PolynomialExpansion] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 5ccda15d87..fdd2494fc8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml._
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -51,11 +51,11 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Standardizes features by removing the mean and scaling to unit variance using column summary
* statistics on the samples in the training set.
*/
-@AlphaComponent
+@Experimental
class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel]
with StandardScalerParams {
@@ -95,10 +95,10 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Model fitted by [[StandardScaler]].
*/
-@AlphaComponent
+@Experimental
class StandardScalerModel private[ml] (
override val uid: String,
scaler: feature.StandardScalerModel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 3f79b67309..a2dc8a8b96 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -18,7 +18,7 @@
package org.apache.spark.ml.feature
import org.apache.spark.SparkException
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.param._
@@ -52,13 +52,13 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* A label indexer that maps a string column of labels to an ML column of label indices.
* If the input column is numeric, we cast it to string and index the string values.
* The indices are in [0, numLabels), ordered by label frequencies.
* So the most frequent label gets index 0.
*/
-@AlphaComponent
+@Experimental
class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel]
with StringIndexerBase {
@@ -86,10 +86,10 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Model fitted by [[StringIndexer]].
*/
-@AlphaComponent
+@Experimental
class StringIndexerModel private[ml] (
override val uid: String,
labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 31f3a1aa4c..21c15b6c33 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -17,19 +17,19 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param._
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* A tokenizer that converts the input string to lowercase and then splits it by white spaces.
*
* @see [[RegexTokenizer]]
*/
-@AlphaComponent
+@Experimental
class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] {
def this() = this(Identifiable.randomUID("tok"))
@@ -46,13 +46,13 @@ class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[S
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* A regex based tokenizer that extracts tokens either by using the provided regex pattern to split
* the text (default) or repeatedly matching the regex (if `gaps` is true).
* Optional parameters also allow filtering tokens using a minimal length.
* It returns an array of strings that can be empty.
*/
-@AlphaComponent
+@Experimental
class RegexTokenizer(override val uid: String)
extends UnaryTransformer[String, Seq[String], RegexTokenizer] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 181b62f46f..514ffb03c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import scala.collection.mutable.ArrayBuilder
import org.apache.spark.SparkException
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.Identifiable
@@ -30,10 +30,10 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* A feature transformer that merges multiple columns into a vector column.
*/
-@AlphaComponent
+@Experimental
class VectorAssembler(override val uid: String)
extends Transformer with HasInputCols with HasOutputCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index e238fb310e..1d0f23b4fb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -22,7 +22,7 @@ import java.util.{Map => JMap}
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.attribute._
import org.apache.spark.ml.param.{IntParam, ParamValidators, Params}
@@ -56,8 +56,7 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Class for indexing categorical feature columns in a dataset of [[Vector]].
*
* This has 2 usage modes:
@@ -91,7 +90,7 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
* - Add warning if a categorical feature has only 1 category.
* - Add option for allowing unknown categories.
*/
-@AlphaComponent
+@Experimental
class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel]
with VectorIndexerParams {
@@ -230,8 +229,7 @@ private object VectorIndexer {
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Transform categorical features to use 0-based indices instead of their original values.
* - Categorical features are mapped to indices.
* - Continuous features (columns) are left unchanged.
@@ -246,7 +244,7 @@ private object VectorIndexer {
* Values are maps from original features values to 0-based category indices.
* If a feature is not in this map, it is treated as continuous.
*/
-@AlphaComponent
+@Experimental
class VectorIndexerModel private[ml] (
override val uid: String,
val numFeatures: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index ed03266922..36f19509f0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -82,11 +82,11 @@ private[feature] trait Word2VecBase extends Params
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
* natural language processing or machine learning process.
*/
-@AlphaComponent
+@Experimental
final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase {
def this() = this(Identifiable.randomUID("w2v"))
@@ -135,10 +135,10 @@ final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel]
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Model fitted by [[Word2Vec]].
*/
-@AlphaComponent
+@Experimental
class Word2VecModel private[ml] (
override val uid: String,
wordVectors: feature.Word2VecModel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/package-info.java
index 00d9c802e9..87f4223964 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/package-info.java
+++ b/mllib/src/main/scala/org/apache/spark/ml/package-info.java
@@ -16,10 +16,10 @@
*/
/**
- * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
+ * Spark ML is a BETA component that adds a new set of machine learning APIs to let users quickly
* assemble and configure practical machine learning pipelines.
*/
-@AlphaComponent
+@Experimental
package org.apache.spark.ml;
-import org.apache.spark.annotation.AlphaComponent;
+import org.apache.spark.annotation.Experimental;
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package.scala b/mllib/src/main/scala/org/apache/spark/ml/package.scala
index ac75e9de1a..c589d06d9f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/package.scala
@@ -18,7 +18,7 @@
package org.apache.spark
/**
- * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly
+ * Spark ML is a BETA component that adds a new set of machine learning APIs to let users quickly
* assemble and configure practical machine learning pipelines.
*
* @groupname param Parameters
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 12fc5b561f..1afa59c994 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -24,11 +24,11 @@ import scala.annotation.varargs
import scala.collection.mutable
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.{DeveloperApi, AlphaComponent}
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
import org.apache.spark.ml.util.Identifiable
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* A param with self-contained documentation and optionally default value. Primitive-typed param
* should use the specialized versions, which are more friendly to Java users.
*
@@ -39,7 +39,7 @@ import org.apache.spark.ml.util.Identifiable
* See [[ParamValidators]] for factory methods for common validation functions.
* @tparam T param value type
*/
-@AlphaComponent
+@DeveloperApi
class Param[T](val parent: String, val name: String, val doc: String, val isValid: T => Boolean)
extends Serializable {
@@ -174,7 +174,11 @@ object ParamValidators {
// specialize primitive-typed params because Java doesn't recognize scala.Double, scala.Int, ...
-/** Specialized version of [[Param[Double]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Double]]] for Java.
+ */
+@DeveloperApi
class DoubleParam(parent: String, name: String, doc: String, isValid: Double => Boolean)
extends Param[Double](parent, name, doc, isValid) {
@@ -189,7 +193,11 @@ class DoubleParam(parent: String, name: String, doc: String, isValid: Double =>
override def w(value: Double): ParamPair[Double] = super.w(value)
}
-/** Specialized version of [[Param[Int]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Int]]] for Java.
+ */
+@DeveloperApi
class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolean)
extends Param[Int](parent, name, doc, isValid) {
@@ -204,7 +212,11 @@ class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolea
override def w(value: Int): ParamPair[Int] = super.w(value)
}
-/** Specialized version of [[Param[Float]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Float]]] for Java.
+ */
+@DeveloperApi
class FloatParam(parent: String, name: String, doc: String, isValid: Float => Boolean)
extends Param[Float](parent, name, doc, isValid) {
@@ -219,7 +231,11 @@ class FloatParam(parent: String, name: String, doc: String, isValid: Float => Bo
override def w(value: Float): ParamPair[Float] = super.w(value)
}
-/** Specialized version of [[Param[Long]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Long]]] for Java.
+ */
+@DeveloperApi
class LongParam(parent: String, name: String, doc: String, isValid: Long => Boolean)
extends Param[Long](parent, name, doc, isValid) {
@@ -234,7 +250,11 @@ class LongParam(parent: String, name: String, doc: String, isValid: Long => Bool
override def w(value: Long): ParamPair[Long] = super.w(value)
}
-/** Specialized version of [[Param[Boolean]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Boolean]]] for Java.
+ */
+@DeveloperApi
class BooleanParam(parent: String, name: String, doc: String) // No need for isValid
extends Param[Boolean](parent, name, doc) {
@@ -243,7 +263,11 @@ class BooleanParam(parent: String, name: String, doc: String) // No need for isV
override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
}
-/** Specialized version of [[Param[Array[String]]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Array[String]]]] for Java.
+ */
+@DeveloperApi
class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean)
extends Param[Array[String]](parent, name, doc, isValid) {
@@ -256,7 +280,11 @@ class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array
def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray)
}
-/** Specialized version of [[Param[Array[Double]]]] for Java. */
+/**
+ * :: DeveloperApi ::
+ * Specialized version of [[Param[Array[Double]]]] for Java.
+ */
+@DeveloperApi
class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array[Double] => Boolean)
extends Param[Array[Double]](parent, name, doc, isValid) {
@@ -270,8 +298,10 @@ class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array
}
/**
+ * :: Experimental ::
* A param amd its value.
*/
+@Experimental
case class ParamPair[T](param: Param[T], value: T) {
// This is *the* place Param.validate is called. Whenever a parameter is specified, we should
// always construct a ParamPair so that validate is called.
@@ -279,11 +309,11 @@ case class ParamPair[T](param: Param[T], value: T) {
}
/**
- * :: AlphaComponent ::
+ * :: DeveloperApi ::
* Trait for components that take parameters. This also provides an internal param map to store
* parameter values attached to the instance.
*/
-@AlphaComponent
+@DeveloperApi
trait Params extends Identifiable with Serializable {
/**
@@ -541,10 +571,10 @@ trait Params extends Identifiable with Serializable {
abstract class JavaParams extends Params
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* A param to value map.
*/
-@AlphaComponent
+@Experimental
final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
extends Serializable {
@@ -665,6 +695,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
def size: Int = map.size
}
+@Experimental
object ParamMap {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 2a5ddbfae5..900b637ff8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
import org.netlib.util.intW
import org.apache.spark.{Logging, Partitioner}
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -169,8 +169,10 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR
}
/**
+ * :: Experimental ::
* Model fitted by ALS.
*/
+@Experimental
class ALSModel private[ml] (
override val uid: String,
k: Int,
@@ -208,6 +210,7 @@ class ALSModel private[ml] (
/**
+ * :: Experimental ::
* Alternating Least Squares (ALS) matrix factorization.
*
* ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices,
@@ -236,6 +239,7 @@ class ALSModel private[ml] (
* indicated user
* preferences rather than explicit ratings given to items.
*/
+@Experimental
class ALS(override val uid: String) extends Estimator[ALSModel] with ALSParams {
import org.apache.spark.ml.recommendation.ALS.Rating
@@ -326,7 +330,11 @@ class ALS(override val uid: String) extends Estimator[ALSModel] with ALSParams {
@DeveloperApi
object ALS extends Logging {
- /** Rating class for better code readability. */
+ /**
+ * :: DeveloperApi ::
+ * Rating class for better code readability.
+ */
+ @DeveloperApi
case class Rating[@specialized(Int, Long) ID](user: ID, item: ID, rating: Float)
/** Trait for least squares solvers applied to the normal equation. */
@@ -487,8 +495,10 @@ object ALS extends Logging {
}
/**
+ * :: DeveloperApi ::
* Implementation of the ALS algorithm.
*/
+ @DeveloperApi
def train[ID: ClassTag]( // scalastyle:ignore
ratings: RDD[Rating[ID]],
rank: Int = 10,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index e67df21b2e..43b68e7bb2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -17,10 +17,10 @@
package org.apache.spark.ml.regression
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{TreeRegressorParams, DecisionTreeParams, DecisionTreeModel, Node}
+import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,13 +31,12 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
* for regression.
* It supports both continuous and categorical features.
*/
-@AlphaComponent
+@Experimental
final class DecisionTreeRegressor(override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeParams with TreeRegressorParams {
@@ -79,19 +78,19 @@ final class DecisionTreeRegressor(override val uid: String)
}
}
+@Experimental
object DecisionTreeRegressor {
/** Accessor for supported impurities: variance */
final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression.
* It supports both continuous and categorical features.
* @param rootNode Root of the decision tree
*/
-@AlphaComponent
+@Experimental
final class DecisionTreeRegressionModel private[ml] (
override val uid: String,
override val rootNode: Node)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 4249ff5c1e..69f4f5414c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -20,10 +20,10 @@ package org.apache.spark.ml.regression
import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
-import org.apache.spark.ml.tree.{GBTParams, TreeRegressorParams, DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -35,13 +35,12 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* learning algorithm for regression.
* It supports both continuous and categorical features.
*/
-@AlphaComponent
+@Experimental
final class GBTRegressor(override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTParams with TreeRegressorParams with Logging {
@@ -134,6 +133,7 @@ final class GBTRegressor(override val uid: String)
}
}
+@Experimental
object GBTRegressor {
// The losses below should be lowercase.
/** Accessor for supported loss settings: squared (L2), absolute (L1) */
@@ -141,7 +141,7 @@ object GBTRegressor {
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
*
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* model for regression.
@@ -149,7 +149,7 @@ object GBTRegressor {
* @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble.
*/
-@AlphaComponent
+@Experimental
final class GBTRegressionModel(
override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 3ebb78f792..7c40db1a40 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV, norm => brzNorm}
import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
import org.apache.spark.Logging
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared.{HasElasticNetParam, HasMaxIter, HasRegParam, HasTol}
@@ -44,8 +44,7 @@ private[regression] trait LinearRegressionParams extends PredictorParams
with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Linear regression.
*
* The learning objective is to minimize the squared error, with regularization.
@@ -58,7 +57,7 @@ private[regression] trait LinearRegressionParams extends PredictorParams
* - L1 (Lasso)
* - L2 + L1 (elastic net)
*/
-@AlphaComponent
+@Experimental
class LinearRegression(override val uid: String)
extends Regressor[Vector, LinearRegression, LinearRegressionModel]
with LinearRegressionParams with Logging {
@@ -190,11 +189,10 @@ class LinearRegression(override val uid: String)
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* Model produced by [[LinearRegression]].
*/
-@AlphaComponent
+@Experimental
class LinearRegressionModel private[ml] (
override val uid: String,
val weights: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 82437aa8de..ae767a1732 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -17,10 +17,10 @@
package org.apache.spark.ml.regression
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.tree.{RandomForestParams, TreeRegressorParams, DecisionTreeModel, TreeEnsembleModel}
+import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams}
import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,12 +31,11 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
* It supports both continuous and categorical features.
*/
-@AlphaComponent
+@Experimental
final class RandomForestRegressor(override val uid: String)
extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
with RandomForestParams with TreeRegressorParams {
@@ -89,6 +88,7 @@ final class RandomForestRegressor(override val uid: String)
}
}
+@Experimental
object RandomForestRegressor {
/** Accessor for supported impurity settings: variance */
final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
@@ -99,13 +99,12 @@ object RandomForestRegressor {
}
/**
- * :: AlphaComponent ::
- *
+ * :: Experimental ::
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for regression.
* It supports both continuous and categorical features.
* @param _trees Decision trees in the ensemble.
*/
-@AlphaComponent
+@Experimental
final class RandomForestRegressionModel private[ml] (
override val uid: String,
private val _trees: Array[DecisionTreeRegressionModel])
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
index d2dec0c76c..6a84176efb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
@@ -17,14 +17,16 @@
package org.apache.spark.ml.tree
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.tree.model.{InformationGainStats => OldInformationGainStats,
Node => OldNode, Predict => OldPredict}
-
/**
+ * :: DeveloperApi ::
* Decision tree node interface.
*/
+@DeveloperApi
sealed abstract class Node extends Serializable {
// TODO: Add aggregate stats (once available). This will happen after we move the DecisionTree
@@ -89,10 +91,12 @@ private[ml] object Node {
}
/**
+ * :: DeveloperApi ::
* Decision tree leaf node.
* @param prediction Prediction this node makes
* @param impurity Impurity measure at this node (for training data)
*/
+@DeveloperApi
final class LeafNode private[ml] (
override val prediction: Double,
override val impurity: Double) extends Node {
@@ -118,6 +122,7 @@ final class LeafNode private[ml] (
}
/**
+ * :: DeveloperApi ::
* Internal Decision Tree node.
* @param prediction Prediction this node would make if it were a leaf node
* @param impurity Impurity measure at this node (for training data)
@@ -127,6 +132,7 @@ final class LeafNode private[ml] (
* @param rightChild Right-hand child node
* @param split Information about the test used to split to the left or right child.
*/
+@DeveloperApi
final class InternalNode private[ml] (
override val prediction: Double,
override val impurity: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
index 90f1d05276..7acdeeee72 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
@@ -17,15 +17,18 @@
package org.apache.spark.ml.tree
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.tree.configuration.{FeatureType => OldFeatureType}
import org.apache.spark.mllib.tree.model.{Split => OldSplit}
/**
+ * :: DeveloperApi ::
* Interface for a "Split," which specifies a test made at a decision tree node
* to choose the left or right path.
*/
+@DeveloperApi
sealed trait Split extends Serializable {
/** Index of feature which this split tests */
@@ -52,12 +55,14 @@ private[tree] object Split {
}
/**
+ * :: DeveloperApi ::
* Split which tests a categorical feature.
* @param featureIndex Index of the feature to test
* @param _leftCategories If the feature value is in this set of categories, then the split goes
* left. Otherwise, it goes right.
* @param numCategories Number of categories for this feature.
*/
+@DeveloperApi
final class CategoricalSplit private[ml] (
override val featureIndex: Int,
_leftCategories: Array[Double],
@@ -125,11 +130,13 @@ final class CategoricalSplit private[ml] (
}
/**
+ * :: DeveloperApi ::
* Split which tests a continuous feature.
* @param featureIndex Index of the feature to test
* @param threshold If the feature value is <= this threshold, then the split goes left.
* Otherwise, it goes right.
*/
+@DeveloperApi
final class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double)
extends Split {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 816fcedf2e..a0c5238d96 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -17,7 +17,6 @@
package org.apache.spark.ml.tree
-import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed}
@@ -26,12 +25,10 @@ import org.apache.spark.mllib.tree.impurity.{Entropy => OldEntropy, Gini => OldG
import org.apache.spark.mllib.tree.loss.{Loss => OldLoss}
/**
- * :: DeveloperApi ::
* Parameters for Decision Tree-based algorithms.
*
* Note: Marked as private and DeveloperApi since this may be made public in the future.
*/
-@DeveloperApi
private[ml] trait DecisionTreeParams extends PredictorParams {
/**
@@ -265,12 +262,10 @@ private[ml] object TreeRegressorParams {
}
/**
- * :: DeveloperApi ::
* Parameters for Decision Tree-based ensemble algorithms.
*
* Note: Marked as private and DeveloperApi since this may be made public in the future.
*/
-@DeveloperApi
private[ml] trait TreeEnsembleParams extends DecisionTreeParams with HasSeed {
/**
@@ -307,12 +302,10 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams with HasSeed {
}
/**
- * :: DeveloperApi ::
* Parameters for Random Forest algorithms.
*
* Note: Marked as private and DeveloperApi since this may be made public in the future.
*/
-@DeveloperApi
private[ml] trait RandomForestParams extends TreeEnsembleParams {
/**
@@ -377,12 +370,10 @@ private[ml] object RandomForestParams {
}
/**
- * :: DeveloperApi ::
* Parameters for Gradient-Boosted Tree algorithms.
*
* Note: Marked as private and DeveloperApi since this may be made public in the future.
*/
-@DeveloperApi
private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index e21ff94a20..2e5a629561 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.tuning
import com.github.fommil.netlib.F2jBLAS
import org.apache.spark.Logging
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml._
import org.apache.spark.ml.evaluation.Evaluator
import org.apache.spark.ml.param._
@@ -79,10 +79,10 @@ private[ml] trait CrossValidatorParams extends Params {
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* K-fold cross validation.
*/
-@AlphaComponent
+@Experimental
class CrossValidator(override val uid: String) extends Estimator[CrossValidatorModel]
with CrossValidatorParams with Logging {
@@ -150,10 +150,10 @@ class CrossValidator(override val uid: String) extends Estimator[CrossValidatorM
}
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Model from k-fold cross validation.
*/
-@AlphaComponent
+@Experimental
class CrossValidatorModel private[ml] (
override val uid: String,
val bestModel: Model[_])
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
index dafe73d82c..98a8f0330c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -20,14 +20,14 @@ package org.apache.spark.ml.tuning
import scala.annotation.varargs
import scala.collection.mutable
-import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.param._
/**
- * :: AlphaComponent ::
+ * :: Experimental ::
* Builder for a param grid used in grid search-based model selection.
*/
-@AlphaComponent
+@Experimental
class ParamGridBuilder {
private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]]