aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Pentreath <nickp@za.ibm.com>2016-06-21 00:39:47 -0700
committerXiangrui Meng <meng@databricks.com>2016-06-21 00:39:47 -0700
commit37494a18e8d6e22113338523d6498e00ac9725ea (patch)
tree0d7eb9a0bed1046a8825ff504f3ea922c2a2d743
parentce49bfc2550ba8f5a33235c7fc3b88201d63c276 (diff)
downloadspark-37494a18e8d6e22113338523d6498e00ac9725ea.tar.gz
spark-37494a18e8d6e22113338523d6498e00ac9725ea.tar.bz2
spark-37494a18e8d6e22113338523d6498e00ac9725ea.zip
[SPARK-10258][DOC][ML] Add @Since annotations to ml.feature
This PR adds missing `Since` annotations to `ml.feature` package. Closes #8505. ## How was this patch tested? Existing tests. Author: Nick Pentreath <nickp@za.ibm.com> Closes #13641 from MLnick/add-since-annotations.
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala11
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala19
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala23
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala23
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala18
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala24
-rwxr-xr-xmllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala33
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala22
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala29
-rwxr-xr-xpython/pyspark/ml/feature.py10
28 files changed, 362 insertions, 68 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 318c8b8b2f..fa9634fdfa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -35,9 +35,11 @@ import org.apache.spark.sql.types._
* Binarize a column of continuous features given a threshold.
*/
@Experimental
-final class Binarizer(override val uid: String)
+@Since("1.4.0")
+final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("binarizer"))
/**
@@ -47,21 +49,26 @@ final class Binarizer(override val uid: String)
* Default: 0.0
* @group param
*/
+ @Since("1.4.0")
val threshold: DoubleParam =
new DoubleParam(this, "threshold", "threshold used to binarize continuous features")
/** @group getParam */
+ @Since("1.4.0")
def getThreshold: Double = $(threshold)
/** @group setParam */
+ @Since("1.4.0")
def setThreshold(value: Double): this.type = set(threshold, value)
setDefault(threshold -> 0.0)
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -96,6 +103,7 @@ final class Binarizer(override val uid: String)
}
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
val inputType = schema($(inputCol)).dataType
val outputColName = $(outputCol)
@@ -115,6 +123,7 @@ final class Binarizer(override val uid: String)
StructType(schema.fields :+ outCol)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): Binarizer = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index ff988cc815..caffc39e2b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
* `Bucketizer` maps a column of continuous features to a column of feature buckets.
*/
@Experimental
-final class Bucketizer(override val uid: String)
+@Since("1.4.0")
+final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("bucketizer"))
/**
@@ -48,6 +50,7 @@ final class Bucketizer(override val uid: String)
* otherwise, values outside the splits specified will be treated as errors.
* @group param
*/
+ @Since("1.4.0")
val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits",
"Split points for mapping continuous features into buckets. With n+1 splits, there are n " +
"buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " +
@@ -57,15 +60,19 @@ final class Bucketizer(override val uid: String)
Bucketizer.checkSplits)
/** @group getParam */
+ @Since("1.4.0")
def getSplits: Array[Double] = $(splits)
/** @group setParam */
+ @Since("1.4.0")
def setSplits(value: Array[Double]): this.type = set(splits, value)
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -86,16 +93,19 @@ final class Bucketizer(override val uid: String)
attr.toStructField()
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
SchemaUtils.appendColumn(schema, prepOutputField(schema))
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): Bucketizer = {
defaultCopy[Bucketizer](extra).setParent(parent)
}
}
+@Since("1.6.0")
object Bucketizer extends DefaultParamsReadable[Bucketizer] {
/** We require splits to be of length >= 3 and to be in strictly increasing order. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index e73a8f5d66..1c329267d7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -62,21 +62,27 @@ private[feature] trait ChiSqSelectorParams extends Params
* categorical label.
*/
@Experimental
-final class ChiSqSelector(override val uid: String)
+@Since("1.6.0")
+final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String)
extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable {
+ @Since("1.6.0")
def this() = this(Identifiable.randomUID("chiSqSelector"))
/** @group setParam */
+ @Since("1.6.0")
def setNumTopFeatures(value: Int): this.type = set(numTopFeatures, value)
/** @group setParam */
+ @Since("1.6.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */
+ @Since("1.6.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.6.0")
def setLabelCol(value: String): this.type = set(labelCol, value)
@Since("2.0.0")
@@ -91,12 +97,14 @@ final class ChiSqSelector(override val uid: String)
copyValues(new ChiSqSelectorModel(uid, chiSqSelector).setParent(this))
}
+ @Since("1.6.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
SchemaUtils.checkNumericType(schema, $(labelCol))
SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT)
}
+ @Since("1.6.0")
override def copy(extra: ParamMap): ChiSqSelector = defaultCopy(extra)
}
@@ -112,23 +120,28 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] {
* Model fitted by [[ChiSqSelector]].
*/
@Experimental
+@Since("1.6.0")
final class ChiSqSelectorModel private[ml] (
- override val uid: String,
+ @Since("1.6.0") override val uid: String,
private val chiSqSelector: feature.ChiSqSelectorModel)
extends Model[ChiSqSelectorModel] with ChiSqSelectorParams with MLWritable {
import ChiSqSelectorModel._
/** list of indices to select (filter). Must be ordered asc */
+ @Since("1.6.0")
val selectedFeatures: Array[Int] = chiSqSelector.selectedFeatures
/** @group setParam */
+ @Since("1.6.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */
+ @Since("1.6.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.6.0")
def setLabelCol(value: String): this.type = set(labelCol, value)
@Since("2.0.0")
@@ -143,6 +156,7 @@ final class ChiSqSelectorModel private[ml] (
dataset.withColumn($(outputCol), selector(col($(featuresCol))), newField.metadata)
}
+ @Since("1.6.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
val newField = prepOutputField(schema)
@@ -165,6 +179,7 @@ final class ChiSqSelectorModel private[ml] (
newAttributeGroup.toStructField()
}
+ @Since("1.6.0")
override def copy(extra: ParamMap): ChiSqSelectorModel = {
val copied = new ChiSqSelectorModel(uid, chiSqSelector)
copyValues(copied, extra).setParent(parent)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index 272567d09c..3250fe5598 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -120,27 +120,35 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
* Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]].
*/
@Experimental
-class CountVectorizer(override val uid: String)
+@Since("1.5.0")
+class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("cntVec"))
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setVocabSize(value: Int): this.type = set(vocabSize, value)
/** @group setParam */
+ @Since("1.5.0")
def setMinDF(value: Double): this.type = set(minDF, value)
/** @group setParam */
+ @Since("1.5.0")
def setMinTF(value: Double): this.type = set(minTF, value)
/** @group setParam */
+ @Since("2.0.0")
def setBinary(value: Boolean): this.type = set(binary, value)
@Since("2.0.0")
@@ -176,10 +184,12 @@ class CountVectorizer(override val uid: String)
copyValues(new CountVectorizerModel(uid, vocab).setParent(this))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): CountVectorizer = defaultCopy(extra)
}
@@ -196,26 +206,34 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] {
* @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted.
*/
@Experimental
-class CountVectorizerModel(override val uid: String, val vocabulary: Array[String])
+@Since("1.5.0")
+class CountVectorizerModel(
+ @Since("1.5.0") override val uid: String,
+ @Since("1.5.0") val vocabulary: Array[String])
extends Model[CountVectorizerModel] with CountVectorizerParams with MLWritable {
import CountVectorizerModel._
+ @Since("1.5.0")
def this(vocabulary: Array[String]) = {
this(Identifiable.randomUID("cntVecModel"), vocabulary)
set(vocabSize, vocabulary.length)
}
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setMinTF(value: Double): this.type = set(minTF, value)
/** @group setParam */
+ @Since("2.0.0")
def setBinary(value: Boolean): this.type = set(binary, value)
/** Dictionary created from [[vocabulary]] and its indices, broadcast once for [[transform()]] */
@@ -252,10 +270,12 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin
dataset.withColumn($(outputCol), vectorizer(col($(inputCol))))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): CountVectorizerModel = {
val copied = new CountVectorizerModel(uid, vocabulary).setParent(parent)
copyValues(copied, extra)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 301358ef12..9605145e12 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -36,9 +36,11 @@ import org.apache.spark.sql.types.DataType
* More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
*/
@Experimental
-class DCT(override val uid: String)
+@Since("1.5.0")
+class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("dct"))
/**
@@ -46,13 +48,16 @@ class DCT(override val uid: String)
* Default: false
* @group param
*/
+ @Since("1.5.0")
def inverse: BooleanParam = new BooleanParam(
this, "inverse", "Set transformer to perform inverse DCT")
/** @group setParam */
+ @Since("1.5.0")
def setInverse(value: Boolean): this.type = set(inverse, value)
/** @group getParam */
+ @Since("1.5.0")
def getInverse: Boolean = $(inverse)
setDefault(inverse -> false)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
index 9d2e60fa3f..92fefb1e6c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -33,21 +33,26 @@ import org.apache.spark.sql.types.DataType
* multiplier.
*/
@Experimental
-class ElementwiseProduct(override val uid: String)
+@Since("2.0.0")
+class ElementwiseProduct @Since("2.0.0") (@Since("2.0.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable {
+ @Since("2.0.0")
def this() = this(Identifiable.randomUID("elemProd"))
/**
* the vector to multiply with input vectors
* @group param
*/
+ @Since("2.0.0")
val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product")
/** @group setParam */
+ @Since("2.0.0")
def setScalingVec(value: Vector): this.type = set(scalingVec, value)
/** @group getParam */
+ @Since("2.0.0")
def getScalingVec: Vector = getOrDefault(scalingVec)
override protected def createTransformFunc: Vector => Vector = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index 94e1825ba6..6ca7336cd0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -38,15 +38,19 @@ import org.apache.spark.sql.types.{ArrayType, StructType}
* otherwise the features will not be mapped evenly to the columns.
*/
@Experimental
-class HashingTF(override val uid: String)
+@Since("1.2.0")
+class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.2.0")
def this() = this(Identifiable.randomUID("hashingTF"))
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/**
@@ -54,6 +58,7 @@ class HashingTF(override val uid: String)
* (default = 2^18^)
* @group param
*/
+ @Since("1.2.0")
val numFeatures = new IntParam(this, "numFeatures", "number of features (> 0)",
ParamValidators.gt(0))
@@ -64,6 +69,7 @@ class HashingTF(override val uid: String)
* (default = false)
* @group param
*/
+ @Since("2.0.0")
val binary = new BooleanParam(this, "binary", "If true, all non zero counts are set to 1. " +
"This is useful for discrete probabilistic models that model binary events rather " +
"than integer counts")
@@ -71,15 +77,19 @@ class HashingTF(override val uid: String)
setDefault(numFeatures -> (1 << 18), binary -> false)
/** @group getParam */
+ @Since("1.2.0")
def getNumFeatures: Int = $(numFeatures)
/** @group setParam */
+ @Since("1.2.0")
def setNumFeatures(value: Int): this.type = set(numFeatures, value)
/** @group getParam */
+ @Since("2.0.0")
def getBinary: Boolean = $(binary)
/** @group setParam */
+ @Since("2.0.0")
def setBinary(value: Boolean): this.type = set(binary, value)
@Since("2.0.0")
@@ -92,6 +102,7 @@ class HashingTF(override val uid: String)
dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
val inputType = schema($(inputCol)).dataType
require(inputType.isInstanceOf[ArrayType],
@@ -100,6 +111,7 @@ class HashingTF(override val uid: String)
SchemaUtils.appendColumn(schema, attrGroup.toStructField())
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): HashingTF = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 08beda6d75..cf03a2845c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -64,18 +64,23 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
* Compute the Inverse Document Frequency (IDF) given a collection of documents.
*/
@Experimental
-final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase
- with DefaultParamsWritable {
+@Since("1.4.0")
+final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
+ extends Estimator[IDFModel] with IDFBase with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("idf"))
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setMinDocFreq(value: Int): this.type = set(minDocFreq, value)
@Since("2.0.0")
@@ -88,10 +93,12 @@ final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBa
copyValues(new IDFModel(uid, idf).setParent(this))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): IDF = defaultCopy(extra)
}
@@ -107,17 +114,20 @@ object IDF extends DefaultParamsReadable[IDF] {
* Model fitted by [[IDF]].
*/
@Experimental
+@Since("1.4.0")
class IDFModel private[ml] (
- override val uid: String,
+ @Since("1.4.0") override val uid: String,
idfModel: feature.IDFModel)
extends Model[IDFModel] with IDFBase with MLWritable {
import IDFModel._
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -128,17 +138,19 @@ class IDFModel private[ml] (
dataset.withColumn($(outputCol), idf(col($(inputCol))))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): IDFModel = {
val copied = new IDFModel(uid, idfModel)
copyValues(copied, extra).setParent(parent)
}
/** Returns the IDF vector. */
- @Since("1.6.0")
+ @Since("2.0.0")
def idf: Vector = idfModel.idf.asML
@Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
index fa65ff9879..dca28b5c5d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
@@ -42,9 +42,9 @@ import org.apache.spark.sql.types._
* `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal
* with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.
*/
-@Since("1.6.0")
@Experimental
-class Interaction @Since("1.6.0") (override val uid: String) extends Transformer
+@Since("1.6.0")
+class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
with HasInputCols with HasOutputCol with DefaultParamsWritable {
@Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
index 7298a18ff8..31a5815267 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
@@ -54,16 +54,19 @@ private[feature] trait MaxAbsScalerParams extends Params with HasInputCol with H
* any sparsity.
*/
@Experimental
-class MaxAbsScaler @Since("2.0.0") (override val uid: String)
+@Since("2.0.0")
+class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String)
extends Estimator[MaxAbsScalerModel] with MaxAbsScalerParams with DefaultParamsWritable {
@Since("2.0.0")
def this() = this(Identifiable.randomUID("maxAbsScal"))
/** @group setParam */
+ @Since("2.0.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("2.0.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -81,17 +84,19 @@ class MaxAbsScaler @Since("2.0.0") (override val uid: String)
copyValues(new MaxAbsScalerModel(uid, Vectors.dense(maxAbs)).setParent(this))
}
+ @Since("2.0.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("2.0.0")
override def copy(extra: ParamMap): MaxAbsScaler = defaultCopy(extra)
}
-@Since("1.6.0")
+@Since("2.0.0")
object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] {
- @Since("1.6.0")
+ @Since("2.0.0")
override def load(path: String): MaxAbsScaler = super.load(path)
}
@@ -101,17 +106,20 @@ object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] {
*
*/
@Experimental
+@Since("2.0.0")
class MaxAbsScalerModel private[ml] (
- override val uid: String,
- val maxAbs: Vector)
+ @Since("2.0.0") override val uid: String,
+ @Since("2.0.0") val maxAbs: Vector)
extends Model[MaxAbsScalerModel] with MaxAbsScalerParams with MLWritable {
import MaxAbsScalerModel._
/** @group setParam */
+ @Since("2.0.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("2.0.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -126,10 +134,12 @@ class MaxAbsScalerModel private[ml] (
dataset.withColumn($(outputCol), reScale(col($(inputCol))))
}
+ @Since("2.0.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("2.0.0")
override def copy(extra: ParamMap): MaxAbsScalerModel = {
val copied = new MaxAbsScalerModel(uid, maxAbs)
copyValues(copied, extra).setParent(parent)
@@ -139,7 +149,7 @@ class MaxAbsScalerModel private[ml] (
override def write: MLWriter = new MaxAbsScalerModelWriter(this)
}
-@Since("1.6.0")
+@Since("2.0.0")
object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] {
private[MaxAbsScalerModel]
@@ -171,9 +181,9 @@ object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] {
}
}
- @Since("1.6.0")
+ @Since("2.0.0")
override def read: MLReader[MaxAbsScalerModel] = new MaxAbsScalerModelReader
- @Since("1.6.0")
+ @Since("2.0.0")
override def load(path: String): MaxAbsScalerModel = super.load(path)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index a27bed5333..dd5a1f9b41 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -85,23 +85,29 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
* transformer will be DenseVector even for sparse input.
*/
@Experimental
-class MinMaxScaler(override val uid: String)
+@Since("1.5.0")
+class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[MinMaxScalerModel] with MinMaxScalerParams with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("minMaxScal"))
setDefault(min -> 0.0, max -> 1.0)
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setMin(value: Double): this.type = set(min, value)
/** @group setParam */
+ @Since("1.5.0")
def setMax(value: Double): this.type = set(max, value)
@Since("2.0.0")
@@ -114,10 +120,12 @@ class MinMaxScaler(override val uid: String)
copyValues(new MinMaxScalerModel(uid, summary.min, summary.max).setParent(this))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): MinMaxScaler = defaultCopy(extra)
}
@@ -138,24 +146,29 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] {
* TODO: The transformer does not yet set the metadata in the output column (SPARK-8529).
*/
@Experimental
+@Since("1.5.0")
class MinMaxScalerModel private[ml] (
- override val uid: String,
- val originalMin: Vector,
- val originalMax: Vector)
+ @Since("1.5.0") override val uid: String,
+ @Since("2.0.0") val originalMin: Vector,
+ @Since("2.0.0") val originalMax: Vector)
extends Model[MinMaxScalerModel] with MinMaxScalerParams with MLWritable {
import MinMaxScalerModel._
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setMin(value: Double): this.type = set(min, value)
/** @group setParam */
+ @Since("1.5.0")
def setMax(value: Double): this.type = set(max, value)
@Since("2.0.0")
@@ -181,10 +194,12 @@ class MinMaxScalerModel private[ml] (
dataset.withColumn($(outputCol), reScale(col($(inputCol))))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): MinMaxScalerModel = {
val copied = new MinMaxScalerModel(uid, originalMin, originalMax)
copyValues(copied, extra).setParent(parent)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
index f8bc7e3f0c..9c1f1ad443 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
@@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
* returned.
*/
@Experimental
-class NGram(override val uid: String)
+@Since("1.5.0")
+class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends UnaryTransformer[Seq[String], Seq[String], NGram] with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("ngram"))
/**
@@ -45,13 +47,16 @@ class NGram(override val uid: String)
* Default: 2, bigram features
* @group param
*/
+ @Since("1.5.0")
val n: IntParam = new IntParam(this, "n", "number elements per n-gram (>=1)",
ParamValidators.gtEq(1))
/** @group setParam */
+ @Since("1.5.0")
def setN(value: Int): this.type = set(n, value)
/** @group getParam */
+ @Since("1.5.0")
def getN: Int = $(n)
setDefault(n -> 2)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index 942ac7ebdb..9a4e682890 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -31,9 +31,11 @@ import org.apache.spark.sql.types.DataType
* Normalize a vector to have unit norm using the given p-norm.
*/
@Experimental
-class Normalizer(override val uid: String)
+@Since("2.0.0")
+class Normalizer @Since("2.0.0") (@Since("2.0.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable {
+ @Since("2.0.0")
def this() = this(Identifiable.randomUID("normalizer"))
/**
@@ -41,14 +43,17 @@ class Normalizer(override val uid: String)
* (default: p = 2)
* @group param
*/
+ @Since("2.0.0")
val p = new DoubleParam(this, "p", "the p norm value", ParamValidators.gtEq(1))
setDefault(p -> 2.0)
/** @group getParam */
+ @Since("2.0.0")
def getP: Double = $(p)
/** @group setParam */
+ @Since("2.0.0")
def setP(value: Double): this.type = set(p, value)
override protected def createTransformFunc: Vector => Vector = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 3d1e6dd818..4fafc1e349 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -43,28 +43,35 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
* @see [[StringIndexer]] for converting categorical values into category indices
*/
@Experimental
-class OneHotEncoder(override val uid: String) extends Transformer
+@Since("1.4.0")
+class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer
with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("oneHot"))
/**
* Whether to drop the last category in the encoded vector (default: true)
* @group param
*/
+ @Since("1.4.0")
final val dropLast: BooleanParam =
new BooleanParam(this, "dropLast", "whether to drop the last category")
setDefault(dropLast -> true)
/** @group setParam */
+ @Since("1.4.0")
def setDropLast(value: Boolean): this.type = set(dropLast, value)
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
val inputColName = $(inputCol)
val outputColName = $(outputCol)
@@ -168,6 +175,7 @@ class OneHotEncoder(override val uid: String) extends Transformer
dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata))
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): OneHotEncoder = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 2f667af9d1..b89c85991f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -65,18 +65,24 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
* principal components.
*/
@Experimental
-class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams
- with DefaultParamsWritable {
+@Since("1.5.0")
+class PCA @Since("1.5.0") (
+ @Since("1.5.0") override val uid: String)
+ extends Estimator[PCAModel] with PCAParams with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("pca"))
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setK(value: Int): this.type = set(k, value)
/**
@@ -93,10 +99,12 @@ class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams
copyValues(new PCAModel(uid, pcaModel.pc, pcaModel.explainedVariance).setParent(this))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): PCA = defaultCopy(extra)
}
@@ -116,18 +124,21 @@ object PCA extends DefaultParamsReadable[PCA] {
* each principal component.
*/
@Experimental
+@Since("1.5.0")
class PCAModel private[ml] (
- override val uid: String,
- val pc: DenseMatrix,
- val explainedVariance: DenseVector)
+ @Since("1.5.0") override val uid: String,
+ @Since("2.0.0") val pc: DenseMatrix,
+ @Since("2.0.0") val explainedVariance: DenseVector)
extends Model[PCAModel] with PCAParams with MLWritable {
import PCAModel._
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/**
@@ -149,10 +160,12 @@ class PCAModel private[ml] (
dataset.withColumn($(outputCol), pcaOp(col($(inputCol))))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): PCAModel = {
val copied = new PCAModel(uid, pc, explainedVariance)
copyValues(copied, extra).setParent(parent)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index a01867701b..026014c7d6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -35,9 +35,11 @@ import org.apache.spark.sql.types.DataType
* `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
*/
@Experimental
-class PolynomialExpansion(override val uid: String)
+@Since("2.0.0")
+class PolynomialExpansion @Since("2.0.0") (@Since("2.0.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable {
+ @Since("2.0.0")
def this() = this(Identifiable.randomUID("poly"))
/**
@@ -45,15 +47,18 @@ class PolynomialExpansion(override val uid: String)
* Default: 2
* @group param
*/
+ @Since("2.0.0")
val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)",
ParamValidators.gtEq(1))
setDefault(degree -> 2)
/** @group getParam */
+ @Since("2.0.0")
def getDegree: Int = $(degree)
/** @group setParam */
+ @Since("2.0.0")
def setDegree(value: Int): this.type = set(degree, value)
override protected def createTransformFunc: Vector => Vector = { v =>
@@ -62,6 +67,7 @@ class PolynomialExpansion(override val uid: String)
override protected def outputDataType: DataType = new VectorUDT()
+ @Since("1.4.1")
override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 1fefaa1fdd..96b8e7d9f7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -74,23 +74,30 @@ private[feature] trait QuantileDiscretizerBase extends Params
* covering all real values.
*/
@Experimental
-final class QuantileDiscretizer(override val uid: String)
+@Since("1.6.0")
+final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String)
extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable {
+ @Since("1.6.0")
def this() = this(Identifiable.randomUID("quantileDiscretizer"))
/** @group setParam */
+ @Since("2.0.0")
def setRelativeError(value: Double): this.type = set(relativeError, value)
/** @group setParam */
+ @Since("1.6.0")
def setNumBuckets(value: Int): this.type = set(numBuckets, value)
/** @group setParam */
+ @Since("1.6.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.6.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
+ @Since("1.6.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
val inputFields = schema.fields
@@ -112,6 +119,7 @@ final class QuantileDiscretizer(override val uid: String)
copyValues(bucketizer.setParent(this))
}
+ @Since("1.6.0")
override def copy(extra: ParamMap): QuantileDiscretizer = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index a7ca0fe252..546dc7e8c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -70,15 +70,18 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol {
* will be created from the specified response variable in the formula.
*/
@Experimental
-class RFormula(override val uid: String)
+@Since("1.5.0")
+class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[RFormulaModel] with RFormulaBase with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("rFormula"))
/**
* R formula parameter. The formula is provided in string form.
* @group param
*/
+ @Since("1.5.0")
val formula: Param[String] = new Param(this, "formula", "R model formula")
/**
@@ -86,15 +89,19 @@ class RFormula(override val uid: String)
* @group setParam
* @param value an R formula in string form (e.g. "y ~ x + z")
*/
+ @Since("1.5.0")
def setFormula(value: String): this.type = set(formula, value)
/** @group getParam */
+ @Since("1.5.0")
def getFormula: String = $(formula)
/** @group setParam */
+ @Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value)
/** Whether the formula specifies fitting an intercept. */
@@ -170,6 +177,7 @@ class RFormula(override val uid: String)
copyValues(new RFormulaModel(uid, resolvedFormula, pipelineModel).setParent(this))
}
+ @Since("1.5.0")
// optimistic schema; does not contain any ML attributes
override def transformSchema(schema: StructType): StructType = {
if (hasLabelCol(schema)) {
@@ -180,8 +188,10 @@ class RFormula(override val uid: String)
}
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): RFormula = defaultCopy(extra)
+ @Since("2.0.0")
override def toString: String = s"RFormula(${get(formula).getOrElse("")}) (uid=$uid)"
}
@@ -201,8 +211,9 @@ object RFormula extends DefaultParamsReadable[RFormula] {
* @param pipelineModel the fitted feature model, including factor to index mappings.
*/
@Experimental
+@Since("1.5.0")
class RFormulaModel private[feature](
- override val uid: String,
+ @Since("1.5.0") override val uid: String,
private[ml] val resolvedFormula: ResolvedRFormula,
private[ml] val pipelineModel: PipelineModel)
extends Model[RFormulaModel] with RFormulaBase with MLWritable {
@@ -213,6 +224,7 @@ class RFormulaModel private[feature](
transformLabel(pipelineModel.transform(dataset))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
checkCanTransform(schema)
val withFeatures = pipelineModel.transformSchema(schema)
@@ -231,9 +243,11 @@ class RFormulaModel private[feature](
}
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): RFormulaModel = copyValues(
new RFormulaModel(uid, resolvedFormula, pipelineModel))
+ @Since("2.0.0")
override def toString: String = s"RFormulaModel($resolvedFormula) (uid=$uid)"
private def transformLabel(dataset: Dataset[_]): DataFrame = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index bd8f9494fb..b8715746fe 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.types.StructType
*/
@Experimental
@Since("1.6.0")
-class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transformer
+class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
with DefaultParamsWritable {
@Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 7cec369c23..5e1bacf876 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -85,21 +85,28 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
* which is computed as the square root of the unbiased sample variance.
*/
@Experimental
-class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel]
- with StandardScalerParams with DefaultParamsWritable {
+@Since("1.2.0")
+class StandardScaler @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
+ extends Estimator[StandardScalerModel] with StandardScalerParams with DefaultParamsWritable {
+ @Since("1.2.0")
def this() = this(Identifiable.randomUID("stdScal"))
/** @group setParam */
+ @Since("1.2.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.2.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setWithMean(value: Boolean): this.type = set(withMean, value)
/** @group setParam */
+ @Since("1.4.0")
def setWithStd(value: Boolean): this.type = set(withStd, value)
@Since("2.0.0")
@@ -113,10 +120,12 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM
copyValues(new StandardScalerModel(uid, scalerModel.std, scalerModel.mean).setParent(this))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): StandardScaler = defaultCopy(extra)
}
@@ -135,18 +144,21 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] {
* @param mean Mean of the StandardScalerModel
*/
@Experimental
+@Since("1.2.0")
class StandardScalerModel private[ml] (
- override val uid: String,
- val std: Vector,
- val mean: Vector)
+ @Since("1.4.0") override val uid: String,
+ @Since("2.0.0") val std: Vector,
+ @Since("2.0.0") val mean: Vector)
extends Model[StandardScalerModel] with StandardScalerParams with MLWritable {
import StandardScalerModel._
/** @group setParam */
+ @Since("1.2.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.2.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -161,10 +173,12 @@ class StandardScalerModel private[ml] (
dataset.withColumn($(outputCol), scale(col($(inputCol))))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): StandardScalerModel = {
val copied = new StandardScalerModel(uid, std, mean)
copyValues(copied, extra).setParent(parent)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 11864cb8f4..1a6f42f773 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -33,15 +33,19 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
* @see [[http://en.wikipedia.org/wiki/Stop_words]]
*/
@Experimental
-class StopWordsRemover(override val uid: String)
+@Since("1.5.0")
+class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("stopWords"))
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/**
@@ -50,13 +54,16 @@ class StopWordsRemover(override val uid: String)
* @see [[StopWordsRemover.loadDefaultStopWords()]]
* @group param
*/
+ @Since("1.5.0")
val stopWords: StringArrayParam =
new StringArrayParam(this, "stopWords", "the words to be filtered out")
/** @group setParam */
+ @Since("1.5.0")
def setStopWords(value: Array[String]): this.type = set(stopWords, value)
/** @group getParam */
+ @Since("1.5.0")
def getStopWords: Array[String] = $(stopWords)
/**
@@ -64,13 +71,16 @@ class StopWordsRemover(override val uid: String)
* Default: false
* @group param
*/
+ @Since("1.5.0")
val caseSensitive: BooleanParam = new BooleanParam(this, "caseSensitive",
"whether to do a case-sensitive comparison over the stop words")
/** @group setParam */
+ @Since("1.5.0")
def setCaseSensitive(value: Boolean): this.type = set(caseSensitive, value)
/** @group getParam */
+ @Since("1.5.0")
def getCaseSensitive: Boolean = $(caseSensitive)
setDefault(stopWords -> StopWordsRemover.loadDefaultStopWords("english"), caseSensitive -> false)
@@ -95,6 +105,7 @@ class StopWordsRemover(override val uid: String)
dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata))
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
val inputType = schema($(inputCol)).dataType
require(inputType.sameType(ArrayType(StringType)),
@@ -102,6 +113,7 @@ class StopWordsRemover(override val uid: String)
SchemaUtils.appendColumn(schema, $(outputCol), inputType, schema($(inputCol)).nullable)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): StopWordsRemover = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index cc0571fd7e..0f7337ce6b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -64,22 +64,27 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
* @see [[IndexToString]] for the inverse transformation
*/
@Experimental
-class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel]
+@Since("1.4.0")
+class StringIndexer @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel]
with StringIndexerBase with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("strIdx"))
/** @group setParam */
+ @Since("1.6.0")
def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
setDefault(handleInvalid, "error")
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
-
@Since("2.0.0")
override def fit(dataset: Dataset[_]): StringIndexerModel = {
val counts = dataset.select(col($(inputCol)).cast(StringType))
@@ -90,10 +95,12 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod
copyValues(new StringIndexerModel(uid, labels).setParent(this))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): StringIndexer = defaultCopy(extra)
}
@@ -115,13 +122,15 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
* @param labels Ordered list of labels, corresponding to indices to be assigned.
*/
@Experimental
+@Since("1.4.0")
class StringIndexerModel (
- override val uid: String,
- val labels: Array[String])
+ @Since("1.4.0") override val uid: String,
+ @Since("1.5.0") val labels: Array[String])
extends Model[StringIndexerModel] with StringIndexerBase with MLWritable {
import StringIndexerModel._
+ @Since("1.5.0")
def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels)
private val labelToIndex: OpenHashMap[String, Double] = {
@@ -136,13 +145,16 @@ class StringIndexerModel (
}
/** @group setParam */
+ @Since("1.6.0")
def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
setDefault(handleInvalid, "error")
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -177,6 +189,7 @@ class StringIndexerModel (
indexer(dataset($(inputCol)).cast(StringType)).as($(outputCol), metadata))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
if (schema.fieldNames.contains($(inputCol))) {
validateAndTransformSchema(schema)
@@ -186,6 +199,7 @@ class StringIndexerModel (
}
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): StringIndexerModel = {
val copied = new StringIndexerModel(uid, labels)
copyValues(copied, extra).setParent(parent)
@@ -245,19 +259,24 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
* @see [[StringIndexer]] for converting strings into indices
*/
@Experimental
-class IndexToString private[ml] (override val uid: String)
+@Since("1.5.0")
+class IndexToString private[ml] (@Since("1.5.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.5.0")
def this() =
this(Identifiable.randomUID("idxToStr"))
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setLabels(value: Array[String]): this.type = set(labels, value)
/**
@@ -266,13 +285,16 @@ class IndexToString private[ml] (override val uid: String)
* Default: Not specified, in which case [[inputCol]] metadata is used for labels.
* @group param
*/
+ @Since("1.5.0")
final val labels: StringArrayParam = new StringArrayParam(this, "labels",
"Optional array of labels specifying index-string mapping." +
" If not provided or if empty, then metadata from inputCol is used instead.")
/** @group getParam */
+ @Since("1.5.0")
final def getLabels: Array[String] = $(labels)
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
val inputColName = $(inputCol)
val inputDataType = schema(inputColName).dataType
@@ -310,6 +332,7 @@ class IndexToString private[ml] (override val uid: String)
indexer(dataset($(inputCol)).cast(DoubleType)).as(outputColName))
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): IndexToString = {
defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 8456a0e915..010c948749 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -30,9 +30,11 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
* @see [[RegexTokenizer]]
*/
@Experimental
-class Tokenizer(override val uid: String)
+@Since("1.2.0")
+class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable {
+ @Since("1.2.0")
def this() = this(Identifiable.randomUID("tok"))
override protected def createTransformFunc: String => Seq[String] = {
@@ -45,6 +47,7 @@ class Tokenizer(override val uid: String)
override protected def outputDataType: DataType = new ArrayType(StringType, true)
+ @Since("1.4.1")
override def copy(extra: ParamMap): Tokenizer = defaultCopy(extra)
}
@@ -63,9 +66,11 @@ object Tokenizer extends DefaultParamsReadable[Tokenizer] {
* It returns an array of strings that can be empty.
*/
@Experimental
-class RegexTokenizer(override val uid: String)
+@Since("1.4.0")
+class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends UnaryTransformer[String, Seq[String], RegexTokenizer] with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("regexTok"))
/**
@@ -73,13 +78,16 @@ class RegexTokenizer(override val uid: String)
* Default: 1, to avoid returning empty strings
* @group param
*/
+ @Since("1.4.0")
val minTokenLength: IntParam = new IntParam(this, "minTokenLength", "minimum token length (>= 0)",
ParamValidators.gtEq(0))
/** @group setParam */
+ @Since("1.4.0")
def setMinTokenLength(value: Int): this.type = set(minTokenLength, value)
/** @group getParam */
+ @Since("1.4.0")
def getMinTokenLength: Int = $(minTokenLength)
/**
@@ -87,12 +95,15 @@ class RegexTokenizer(override val uid: String)
* Default: true
* @group param
*/
+ @Since("1.4.0")
val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens")
/** @group setParam */
+ @Since("1.4.0")
def setGaps(value: Boolean): this.type = set(gaps, value)
/** @group getParam */
+ @Since("1.4.0")
def getGaps: Boolean = $(gaps)
/**
@@ -100,12 +111,15 @@ class RegexTokenizer(override val uid: String)
* Default: `"\\s+"`
* @group param
*/
+ @Since("1.4.0")
val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing")
/** @group setParam */
+ @Since("1.4.0")
def setPattern(value: String): this.type = set(pattern, value)
/** @group getParam */
+ @Since("1.4.0")
def getPattern: String = $(pattern)
/**
@@ -113,13 +127,16 @@ class RegexTokenizer(override val uid: String)
* Default: true
* @group param
*/
+ @Since("1.6.0")
final val toLowercase: BooleanParam = new BooleanParam(this, "toLowercase",
"whether to convert all characters to lowercase before tokenizing.")
/** @group setParam */
+ @Since("1.6.0")
def setToLowercase(value: Boolean): this.type = set(toLowercase, value)
/** @group getParam */
+ @Since("1.6.0")
def getToLowercase: Boolean = $(toLowercase)
setDefault(minTokenLength -> 1, gaps -> true, pattern -> "\\s+", toLowercase -> true)
@@ -138,6 +155,7 @@ class RegexTokenizer(override val uid: String)
override protected def outputDataType: DataType = new ArrayType(StringType, true)
+ @Since("1.4.1")
override def copy(extra: ParamMap): RegexTokenizer = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 1bc24202b7..4939dabd98 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -36,15 +36,19 @@ import org.apache.spark.sql.types._
* A feature transformer that merges multiple columns into a vector column.
*/
@Experimental
-class VectorAssembler(override val uid: String)
+@Since("1.4.0")
+class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("vecAssembler"))
/** @group setParam */
+ @Since("1.4.0")
def setInputCols(value: Array[String]): this.type = set(inputCols, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -106,6 +110,7 @@ class VectorAssembler(override val uid: String)
dataset.select(col("*"), assembleFunc(struct(args: _*)).as($(outputCol), metadata))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
val inputColNames = $(inputCols)
val outputColName = $(outputCol)
@@ -122,6 +127,7 @@ class VectorAssembler(override val uid: String)
StructType(schema.fields :+ new StructField(outputColName, new VectorUDT, true))
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): VectorAssembler = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index d814528ec4..52db996c84 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -94,18 +94,24 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
* - Add option for allowing unknown categories.
*/
@Experimental
-class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel]
- with VectorIndexerParams with DefaultParamsWritable {
+@Since("1.4.0")
+class VectorIndexer @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
+ extends Estimator[VectorIndexerModel] with VectorIndexerParams with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("vecIdx"))
/** @group setParam */
+ @Since("1.4.0")
def setMaxCategories(value: Int): this.type = set(maxCategories, value)
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -126,6 +132,7 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod
copyValues(model)
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
// We do not transfer feature metadata since we do not know what types of features we will
// produce in transform().
@@ -136,6 +143,7 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod
SchemaUtils.appendColumn(schema, $(outputCol), dataType)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): VectorIndexer = defaultCopy(extra)
}
@@ -256,15 +264,17 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
* If a feature is not in this map, it is treated as continuous.
*/
@Experimental
+@Since("1.4.0")
class VectorIndexerModel private[ml] (
- override val uid: String,
- val numFeatures: Int,
- val categoryMaps: Map[Int, Map[Double, Int]])
+ @Since("1.4.0") override val uid: String,
+ @Since("1.4.0") val numFeatures: Int,
+ @Since("1.4.0") val categoryMaps: Map[Int, Map[Double, Int]])
extends Model[VectorIndexerModel] with VectorIndexerParams with MLWritable {
import VectorIndexerModel._
/** Java-friendly version of [[categoryMaps]] */
+ @Since("1.4.0")
def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = {
categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]]
}
@@ -342,9 +352,11 @@ class VectorIndexerModel private[ml] (
}
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -356,6 +368,7 @@ class VectorIndexerModel private[ml] (
dataset.withColumn($(outputCol), newCol, newField.metadata)
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
val dataType = new VectorUDT
require(isDefined(inputCol),
@@ -415,6 +428,7 @@ class VectorIndexerModel private[ml] (
newAttributeGroup.toStructField()
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): VectorIndexerModel = {
val copied = new VectorIndexerModel(uid, numFeatures, categoryMaps)
copyValues(copied, extra).setParent(parent)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
index 103738cd91..6769e490c5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
@@ -41,9 +41,11 @@ import org.apache.spark.sql.types.StructType
* followed by the selected names (in the order given).
*/
@Experimental
-final class VectorSlicer(override val uid: String)
+@Since("1.5.0")
+final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+ @Since("1.5.0")
def this() = this(Identifiable.randomUID("vectorSlicer"))
/**
@@ -52,6 +54,7 @@ final class VectorSlicer(override val uid: String)
* Default: Empty array
* @group param
*/
+ @Since("1.5.0")
val indices = new IntArrayParam(this, "indices",
"An array of indices to select features from a vector column." +
" There can be no overlap with names.", VectorSlicer.validIndices)
@@ -59,9 +62,11 @@ final class VectorSlicer(override val uid: String)
setDefault(indices -> Array.empty[Int])
/** @group getParam */
+ @Since("1.5.0")
def getIndices: Array[Int] = $(indices)
/** @group setParam */
+ @Since("1.5.0")
def setIndices(value: Array[Int]): this.type = set(indices, value)
/**
@@ -71,6 +76,7 @@ final class VectorSlicer(override val uid: String)
* Default: Empty Array
* @group param
*/
+ @Since("1.5.0")
val names = new StringArrayParam(this, "names",
"An array of feature names to select features from a vector column." +
" There can be no overlap with indices.", VectorSlicer.validNames)
@@ -78,15 +84,19 @@ final class VectorSlicer(override val uid: String)
setDefault(names -> Array.empty[String])
/** @group getParam */
+ @Since("1.5.0")
def getNames: Array[String] = $(names)
/** @group setParam */
+ @Since("1.5.0")
def setNames(value: Array[String]): this.type = set(names, value)
/** @group setParam */
+ @Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
@Since("2.0.0")
@@ -134,6 +144,7 @@ final class VectorSlicer(override val uid: String)
indFeatures ++ nameFeatures
}
+ @Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
require($(indices).length > 0 || $(names).length > 0,
s"VectorSlicer requires that at least one feature be selected.")
@@ -148,6 +159,7 @@ final class VectorSlicer(override val uid: String)
StructType(outputFields)
}
+ @Since("1.5.0")
override def copy(extra: ParamMap): VectorSlicer = defaultCopy(extra)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 33515b2240..05c4f2f1a7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -120,39 +120,52 @@ private[feature] trait Word2VecBase extends Params
* natural language processing or machine learning process.
*/
@Experimental
-final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase
- with DefaultParamsWritable {
+@Since("1.4.0")
+final class Word2Vec @Since("1.4.0") (
+ @Since("1.4.0") override val uid: String)
+ extends Estimator[Word2VecModel] with Word2VecBase with DefaultParamsWritable {
+ @Since("1.4.0")
def this() = this(Identifiable.randomUID("w2v"))
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setVectorSize(value: Int): this.type = set(vectorSize, value)
/** @group expertSetParam */
+ @Since("1.6.0")
def setWindowSize(value: Int): this.type = set(windowSize, value)
/** @group setParam */
+ @Since("1.4.0")
def setStepSize(value: Double): this.type = set(stepSize, value)
/** @group setParam */
+ @Since("1.4.0")
def setNumPartitions(value: Int): this.type = set(numPartitions, value)
/** @group setParam */
+ @Since("1.4.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
/** @group setParam */
+ @Since("1.4.0")
def setSeed(value: Long): this.type = set(seed, value)
/** @group setParam */
+ @Since("1.4.0")
def setMinCount(value: Int): this.type = set(minCount, value)
/** @group setParam */
+ @Since("2.0.0")
def setMaxSentenceLength(value: Int): this.type = set(maxSentenceLength, value)
@Since("2.0.0")
@@ -172,10 +185,12 @@ final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel]
copyValues(new Word2VecModel(uid, wordVectors).setParent(this))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): Word2Vec = defaultCopy(extra)
}
@@ -191,8 +206,9 @@ object Word2Vec extends DefaultParamsReadable[Word2Vec] {
* Model fitted by [[Word2Vec]].
*/
@Experimental
+@Since("1.4.0")
class Word2VecModel private[ml] (
- override val uid: String,
+ @Since("1.4.0") override val uid: String,
@transient private val wordVectors: feature.Word2VecModel)
extends Model[Word2VecModel] with Word2VecBase with MLWritable {
@@ -202,6 +218,7 @@ class Word2VecModel private[ml] (
* Returns a dataframe with two fields, "word" and "vector", with "word" being a String and
* and the vector the DenseVector that it is mapped to.
*/
+ @Since("1.5.0")
@transient lazy val getVectors: DataFrame = {
val spark = SparkSession.builder().getOrCreate()
val wordVec = wordVectors.getVectors.mapValues(vec => Vectors.dense(vec.map(_.toDouble)))
@@ -213,6 +230,7 @@ class Word2VecModel private[ml] (
* Returns a dataframe with the words and the cosine similarities between the
* synonyms and the given word.
*/
+ @Since("1.5.0")
def findSynonyms(word: String, num: Int): DataFrame = {
findSynonyms(wordVectors.transform(word), num)
}
@@ -222,15 +240,18 @@ class Word2VecModel private[ml] (
* of the word. Returns a dataframe with the words and the cosine similarities between the
* synonyms and the given word vector.
*/
+ @Since("1.5.0")
def findSynonyms(word: Vector, num: Int): DataFrame = {
val spark = SparkSession.builder().getOrCreate()
spark.createDataFrame(wordVectors.findSynonyms(word, num)).toDF("word", "similarity")
}
/** @group setParam */
+ @Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)
/** @group setParam */
+ @Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)
/**
@@ -262,10 +283,12 @@ class Word2VecModel private[ml] (
dataset.withColumn($(outputCol), word2Vec(col($(inputCol))))
}
+ @Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}
+ @Since("1.4.1")
override def copy(extra: ParamMap): Word2VecModel = {
val copied = new Word2VecModel(uid, wordVectors)
copyValues(copied, extra).setParent(parent)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index a28764a752..1e9ec0fbb4 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -149,7 +149,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
>>> loadedBucketizer.getSplits() == bucketizer.getSplits()
True
- .. versionadded:: 1.3.0
+ .. versionadded:: 1.4.0
"""
splits = \
@@ -486,14 +486,14 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
- @since("1.5.0")
+ @since("2.0.0")
def setScalingVec(self, value):
"""
Sets the value of :py:attr:`scalingVec`.
"""
return self._set(scalingVec=value)
- @since("1.5.0")
+ @since("2.0.0")
def getScalingVec(self):
"""
Gets the value of scalingVec or its default value.
@@ -1584,7 +1584,7 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
@property
- @since("1.5.0")
+ @since("2.0.0")
def std(self):
"""
Standard deviation of the StandardScalerModel.
@@ -1592,7 +1592,7 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
return self._call_java("std")
@property
- @since("1.5.0")
+ @since("2.0.0")
def mean(self):
"""
Mean of the StandardScalerModel.