aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/feature.py
diff options
context:
space:
mode:
authorlihao <lihaowhu@gmail.com>2015-11-02 16:09:22 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-02 16:09:22 -0800
commitecfb3e73fd0a99f0be96034710974e78b6f9d624 (patch)
tree7a7b2d6d2052aa0ef524ad65ccaab77b4b6289b4 /python/pyspark/ml/feature.py
parent2804674a7af8f11eeb1280459bc9145815398eed (diff)
downloadspark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.gz
spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.bz2
spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.zip
[SPARK-10286][ML][PYSPARK][DOCS] Add @since annotation to pyspark.ml.param and pyspark.ml.*
Author: lihao <lihaowhu@gmail.com> Closes #9275 from lidinghao/SPARK-10286.
Diffstat (limited to 'python/pyspark/ml/feature.py')
-rw-r--r--python/pyspark/ml/feature.py164
1 files changed, 164 insertions, 0 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 55bde6d0ea..c7b6dd926c 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -19,6 +19,7 @@ import sys
if sys.version > '3':
basestring = str
+from pyspark import since
from pyspark.rdd import ignore_unicode_prefix
from pyspark.ml.param.shared import *
from pyspark.ml.util import keyword_only
@@ -51,6 +52,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
>>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}
>>> binarizer.transform(df, params).head().vector
1.0
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -71,6 +74,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
"""
setParams(self, threshold=0.0, inputCol=None, outputCol=None)
@@ -79,6 +83,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setThreshold(self, value):
"""
Sets the value of :py:attr:`threshold`.
@@ -86,6 +91,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.threshold] = value
return self
+ @since("1.4.0")
def getThreshold(self):
"""
Gets the value of threshold or its default value.
@@ -114,6 +120,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
2.0
>>> bucketizer.setParams(outputCol="b").transform(df).head().b
0.0
+
+ .. versionadded:: 1.3.0
"""
# a placeholder to make it appear in the generated doc
@@ -150,6 +158,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, splits=None, inputCol=None, outputCol=None):
"""
setParams(self, splits=None, inputCol=None, outputCol=None)
@@ -158,6 +167,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setSplits(self, value):
"""
Sets the value of :py:attr:`splits`.
@@ -165,6 +175,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.splits] = value
return self
+ @since("1.4.0")
def getSplits(self):
"""
Gets the value of threshold or its default value.
@@ -194,6 +205,8 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
...
>>> sorted(map(str, model.vocabulary))
['a', 'b', 'c']
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -242,6 +255,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None):
"""
setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None)
@@ -250,6 +264,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setMinTF(self, value):
"""
Sets the value of :py:attr:`minTF`.
@@ -257,12 +272,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.minTF] = value
return self
+ @since("1.6.0")
def getMinTF(self):
"""
Gets the value of minTF or its default value.
"""
return self.getOrDefault(self.minTF)
+ @since("1.6.0")
def setMinDF(self, value):
"""
Sets the value of :py:attr:`minDF`.
@@ -270,12 +287,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.minDF] = value
return self
+ @since("1.6.0")
def getMinDF(self):
"""
Gets the value of minDF or its default value.
"""
return self.getOrDefault(self.minDF)
+ @since("1.6.0")
def setVocabSize(self, value):
"""
Sets the value of :py:attr:`vocabSize`.
@@ -283,6 +302,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.vocabSize] = value
return self
+ @since("1.6.0")
def getVocabSize(self):
"""
Gets the value of vocabSize or its default value.
@@ -298,9 +318,12 @@ class CountVectorizerModel(JavaModel):
.. note:: Experimental
Model fitted by CountVectorizer.
+
+ .. versionadded:: 1.6.0
"""
@property
+ @since("1.6.0")
def vocabulary(self):
"""
An array of terms in the vocabulary.
@@ -331,6 +354,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
>>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2)
>>> df3.head().origVec
DenseVector([5.0, 8.0, 6.0])
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -351,6 +376,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inverse=False, inputCol=None, outputCol=None):
"""
setParams(self, inverse=False, inputCol=None, outputCol=None)
@@ -359,6 +385,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setInverse(self, value):
"""
Sets the value of :py:attr:`inverse`.
@@ -366,6 +393,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.inverse] = value
return self
+ @since("1.6.0")
def getInverse(self):
"""
Gets the value of inverse or its default value.
@@ -390,6 +418,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
DenseVector([2.0, 2.0, 9.0])
>>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod
DenseVector([4.0, 3.0, 15.0])
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -410,6 +440,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, scalingVec=None, inputCol=None, outputCol=None):
"""
setParams(self, scalingVec=None, inputCol=None, outputCol=None)
@@ -418,6 +449,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setScalingVec(self, value):
"""
Sets the value of :py:attr:`scalingVec`.
@@ -425,6 +457,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.scalingVec] = value
return self
+ @since("1.5.0")
def getScalingVec(self):
"""
Gets the value of scalingVec or its default value.
@@ -449,6 +482,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
>>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"}
>>> hashingTF.transform(df, params).head().vector
SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0})
+
+ .. versionadded:: 1.3.0
"""
@keyword_only
@@ -463,6 +498,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
self.setParams(**kwargs)
@keyword_only
+ @since("1.3.0")
def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
"""
setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None)
@@ -490,6 +526,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
>>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}
>>> idf.fit(df, params).transform(df).head().vector
DenseVector([0.2877, 0.0])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -510,6 +548,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
"""
setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
@@ -518,6 +557,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setMinDocFreq(self, value):
"""
Sets the value of :py:attr:`minDocFreq`.
@@ -525,6 +565,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.minDocFreq] = value
return self
+ @since("1.4.0")
def getMinDocFreq(self):
"""
Gets the value of minDocFreq or its default value.
@@ -540,6 +581,8 @@ class IDFModel(JavaModel):
.. note:: Experimental
Model fitted by IDF.
+
+ .. versionadded:: 1.4.0
"""
@@ -571,6 +614,8 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
|[2.0]| [1.0]|
+-----+------+
...
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -591,6 +636,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
"""
setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
@@ -599,6 +645,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setMin(self, value):
"""
Sets the value of :py:attr:`min`.
@@ -606,12 +653,14 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.min] = value
return self
+ @since("1.6.0")
def getMin(self):
"""
Gets the value of min or its default value.
"""
return self.getOrDefault(self.min)
+ @since("1.6.0")
def setMax(self, value):
"""
Sets the value of :py:attr:`max`.
@@ -619,6 +668,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.max] = value
return self
+ @since("1.6.0")
def getMax(self):
"""
Gets the value of max or its default value.
@@ -634,6 +684,8 @@ class MinMaxScalerModel(JavaModel):
.. note:: Experimental
Model fitted by :py:class:`MinMaxScaler`.
+
+ .. versionadded:: 1.6.0
"""
@@ -668,6 +720,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -686,6 +740,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, n=2, inputCol=None, outputCol=None):
"""
setParams(self, n=2, inputCol=None, outputCol=None)
@@ -694,6 +749,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setN(self, value):
"""
Sets the value of :py:attr:`n`.
@@ -701,6 +757,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.n] = value
return self
+ @since("1.5.0")
def getN(self):
"""
Gets the value of n or its default value.
@@ -726,6 +783,8 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
>>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}
>>> normalizer.transform(df, params).head().vector
DenseVector([0.4286, -0.5714])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -744,6 +803,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, p=2.0, inputCol=None, outputCol=None):
"""
setParams(self, p=2.0, inputCol=None, outputCol=None)
@@ -752,6 +812,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setP(self, value):
"""
Sets the value of :py:attr:`p`.
@@ -759,6 +820,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.p] = value
return self
+ @since("1.4.0")
def getP(self):
"""
Gets the value of p or its default value.
@@ -800,6 +862,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
>>> params = {encoder.dropLast: False, encoder.outputCol: "test"}
>>> encoder.transform(td, params).head().test
SparseVector(3, {0: 1.0})
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -818,6 +882,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, dropLast=True, inputCol=None, outputCol=None):
"""
setParams(self, dropLast=True, inputCol=None, outputCol=None)
@@ -826,6 +891,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setDropLast(self, value):
"""
Sets the value of :py:attr:`dropLast`.
@@ -833,6 +899,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.dropLast] = value
return self
+ @since("1.4.0")
def getDropLast(self):
"""
Gets the value of dropLast or its default value.
@@ -858,6 +925,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
>>> px.setParams(outputCol="test").transform(df).head().test
DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -877,6 +946,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, degree=2, inputCol=None, outputCol=None):
"""
setParams(self, degree=2, inputCol=None, outputCol=None)
@@ -885,6 +955,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setDegree(self, value):
"""
Sets the value of :py:attr:`degree`.
@@ -892,6 +963,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.degree] = value
return self
+ @since("1.4.0")
def getDegree(self):
"""
Gets the value of degree or its default value.
@@ -929,6 +1001,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -951,6 +1025,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None):
"""
setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None)
@@ -959,6 +1034,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setMinTokenLength(self, value):
"""
Sets the value of :py:attr:`minTokenLength`.
@@ -966,12 +1042,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.minTokenLength] = value
return self
+ @since("1.4.0")
def getMinTokenLength(self):
"""
Gets the value of minTokenLength or its default value.
"""
return self.getOrDefault(self.minTokenLength)
+ @since("1.4.0")
def setGaps(self, value):
"""
Sets the value of :py:attr:`gaps`.
@@ -979,12 +1057,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.gaps] = value
return self
+ @since("1.4.0")
def getGaps(self):
"""
Gets the value of gaps or its default value.
"""
return self.getOrDefault(self.gaps)
+ @since("1.4.0")
def setPattern(self, value):
"""
Sets the value of :py:attr:`pattern`.
@@ -992,6 +1072,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.pattern] = value
return self
+ @since("1.4.0")
def getPattern(self):
"""
Gets the value of pattern or its default value.
@@ -1013,6 +1094,8 @@ class SQLTransformer(JavaTransformer):
... statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
>>> sqlTrans.transform(df).head()
Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0)
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -1030,6 +1113,7 @@ class SQLTransformer(JavaTransformer):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, statement=None):
"""
setParams(self, statement=None)
@@ -1038,6 +1122,7 @@ class SQLTransformer(JavaTransformer):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setStatement(self, value):
"""
Sets the value of :py:attr:`statement`.
@@ -1045,6 +1130,7 @@ class SQLTransformer(JavaTransformer):
self._paramMap[self.statement] = value
return self
+ @since("1.6.0")
def getStatement(self):
"""
Gets the value of statement or its default value.
@@ -1070,6 +1156,8 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
DenseVector([1.4142])
>>> model.transform(df).collect()[1].scaled
DenseVector([1.4142])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -1090,6 +1178,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
"""
setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
@@ -1098,6 +1187,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setWithMean(self, value):
"""
Sets the value of :py:attr:`withMean`.
@@ -1105,12 +1195,14 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.withMean] = value
return self
+ @since("1.4.0")
def getWithMean(self):
"""
Gets the value of withMean or its default value.
"""
return self.getOrDefault(self.withMean)
+ @since("1.4.0")
def setWithStd(self, value):
"""
Sets the value of :py:attr:`withStd`.
@@ -1118,6 +1210,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.withStd] = value
return self
+ @since("1.4.0")
def getWithStd(self):
"""
Gets the value of withStd or its default value.
@@ -1133,9 +1226,12 @@ class StandardScalerModel(JavaModel):
.. note:: Experimental
Model fitted by StandardScaler.
+
+ .. versionadded:: 1.4.0
"""
@property
+ @since("1.5.0")
def std(self):
"""
Standard deviation of the StandardScalerModel.
@@ -1143,6 +1239,7 @@ class StandardScalerModel(JavaModel):
return self._call_java("std")
@property
+ @since("1.5.0")
def mean(self):
"""
Mean of the StandardScalerModel.
@@ -1171,6 +1268,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid):
>>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]),
... key=lambda x: x[0])
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
+
+ .. versionadded:: 1.4.0
"""
@keyword_only
@@ -1185,6 +1284,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"):
"""
setParams(self, inputCol=None, outputCol=None, handleInvalid="error")
@@ -1202,8 +1302,11 @@ class StringIndexerModel(JavaModel):
.. note:: Experimental
Model fitted by StringIndexer.
+
+ .. versionadded:: 1.4.0
"""
@property
+ @since("1.5.0")
def labels(self):
"""
Ordered list of labels, corresponding to indices to be assigned.
@@ -1221,6 +1324,8 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
The index-string mapping is either from the ML attributes of the input column,
or from user-supplied labels (which take precedence over ML attributes).
See L{StringIndexer} for converting strings into indices.
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make the labels show up in generated doc
@@ -1243,6 +1348,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inputCol=None, outputCol=None, labels=None):
"""
setParams(self, inputCol=None, outputCol=None, labels=None)
@@ -1251,6 +1357,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setLabels(self, value):
"""
Sets the value of :py:attr:`labels`.
@@ -1258,6 +1365,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.labels] = value
return self
+ @since("1.6.0")
def getLabels(self):
"""
Gets the value of :py:attr:`labels` or its default value.
@@ -1271,6 +1379,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
A feature transformer that filters out stop words from input.
Note: null values from input array are preserved unless adding null to stopWords explicitly.
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make the stopwords show up in generated doc
stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out")
@@ -1297,6 +1407,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inputCol=None, outputCol=None, stopWords=None,
caseSensitive=False):
"""
@@ -1307,6 +1418,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setStopWords(self, value):
"""
Specify the stopwords to be filtered.
@@ -1314,12 +1426,14 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.stopWords] = value
return self
+ @since("1.6.0")
def getStopWords(self):
"""
Get the stopwords.
"""
return self.getOrDefault(self.stopWords)
+ @since("1.6.0")
def setCaseSensitive(self, value):
"""
Set whether to do a case sensitive comparison over the stop words
@@ -1327,6 +1441,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.caseSensitive] = value
return self
+ @since("1.6.0")
def getCaseSensitive(self):
"""
Get whether to do a case sensitive comparison over the stop words.
@@ -1360,6 +1475,8 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
+
+ .. versionadded:: 1.3.0
"""
@keyword_only
@@ -1373,6 +1490,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.3.0")
def setParams(self, inputCol=None, outputCol=None):
"""
setParams(self, inputCol="input", outputCol="output")
@@ -1398,6 +1516,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
>>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
>>> vecAssembler.transform(df, params).head().vector
DenseVector([0.0, 1.0])
+
+ .. versionadded:: 1.4.0
"""
@keyword_only
@@ -1411,6 +1531,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, inputCols=None, outputCol=None):
"""
setParams(self, inputCols=None, outputCol=None)
@@ -1477,6 +1598,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
>>> model2 = indexer.fit(df, params)
>>> model2.transform(df).head().vector
DenseVector([1.0, 0.0])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -1501,6 +1624,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, maxCategories=20, inputCol=None, outputCol=None):
"""
setParams(self, maxCategories=20, inputCol=None, outputCol=None)
@@ -1509,6 +1633,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setMaxCategories(self, value):
"""
Sets the value of :py:attr:`maxCategories`.
@@ -1516,6 +1641,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.maxCategories] = value
return self
+ @since("1.4.0")
def getMaxCategories(self):
"""
Gets the value of maxCategories or its default value.
@@ -1531,9 +1657,12 @@ class VectorIndexerModel(JavaModel):
.. note:: Experimental
Model fitted by VectorIndexer.
+
+ .. versionadded:: 1.4.0
"""
@property
+ @since("1.4.0")
def numFeatures(self):
"""
Number of features, i.e., length of Vectors which this transforms.
@@ -1541,6 +1670,7 @@ class VectorIndexerModel(JavaModel):
return self._call_java("numFeatures")
@property
+ @since("1.4.0")
def categoryMaps(self):
"""
Feature value index. Keys are categorical feature indices (column indices).
@@ -1573,6 +1703,8 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
>>> vs = VectorSlicer(inputCol="features", outputCol="sliced", indices=[1, 4])
>>> vs.transform(df).head().sliced
DenseVector([2.3, 1.0])
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -1600,6 +1732,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
"""
setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
@@ -1608,6 +1741,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setIndices(self, value):
"""
Sets the value of :py:attr:`indices`.
@@ -1615,12 +1749,14 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.indices] = value
return self
+ @since("1.6.0")
def getIndices(self):
"""
Gets the value of indices or its default value.
"""
return self.getOrDefault(self.indices)
+ @since("1.6.0")
def setNames(self, value):
"""
Sets the value of :py:attr:`names`.
@@ -1628,6 +1764,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.names] = value
return self
+ @since("1.6.0")
def getNames(self):
"""
Gets the value of names or its default value.
@@ -1666,6 +1803,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
...
>>> model.transform(doc).head().model
DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -1699,6 +1838,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=None, inputCol=None, outputCol=None):
"""
@@ -1709,6 +1849,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setVectorSize(self, value):
"""
Sets the value of :py:attr:`vectorSize`.
@@ -1716,12 +1857,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._paramMap[self.vectorSize] = value
return self
+ @since("1.4.0")
def getVectorSize(self):
"""
Gets the value of vectorSize or its default value.
"""
return self.getOrDefault(self.vectorSize)
+ @since("1.4.0")
def setNumPartitions(self, value):
"""
Sets the value of :py:attr:`numPartitions`.
@@ -1729,12 +1872,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._paramMap[self.numPartitions] = value
return self
+ @since("1.4.0")
def getNumPartitions(self):
"""
Gets the value of numPartitions or its default value.
"""
return self.getOrDefault(self.numPartitions)
+ @since("1.4.0")
def setMinCount(self, value):
"""
Sets the value of :py:attr:`minCount`.
@@ -1742,6 +1887,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._paramMap[self.minCount] = value
return self
+ @since("1.4.0")
def getMinCount(self):
"""
Gets the value of minCount or its default value.
@@ -1757,8 +1903,11 @@ class Word2VecModel(JavaModel):
.. note:: Experimental
Model fitted by Word2Vec.
+
+ .. versionadded:: 1.4.0
"""
+ @since("1.5.0")
def getVectors(self):
"""
Returns the vector representation of the words as a dataframe
@@ -1766,6 +1915,7 @@ class Word2VecModel(JavaModel):
"""
return self._call_java("getVectors")
+ @since("1.5.0")
def findSynonyms(self, word, num):
"""
Find "num" number of words closest in similarity to "word".
@@ -1794,6 +1944,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
>>> model = pca.fit(df)
>>> model.transform(df).collect()[0].pca_features
DenseVector([1.648..., -4.013...])
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -1811,6 +1963,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, k=None, inputCol=None, outputCol=None):
"""
setParams(self, k=None, inputCol=None, outputCol=None)
@@ -1819,6 +1972,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setK(self, value):
"""
Sets the value of :py:attr:`k`.
@@ -1826,6 +1980,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.k] = value
return self
+ @since("1.5.0")
def getK(self):
"""
Gets the value of k or its default value.
@@ -1841,6 +1996,8 @@ class PCAModel(JavaModel):
.. note:: Experimental
Model fitted by PCA.
+
+ .. versionadded:: 1.5.0
"""
@@ -1879,6 +2036,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
|0.0|0.0| a| [0.0]| 0.0|
+---+---+---+--------+-----+
...
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -1896,6 +2055,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, formula=None, featuresCol="features", labelCol="label"):
"""
setParams(self, formula=None, featuresCol="features", labelCol="label")
@@ -1904,6 +2064,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setFormula(self, value):
"""
Sets the value of :py:attr:`formula`.
@@ -1911,6 +2072,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
self._paramMap[self.formula] = value
return self
+ @since("1.5.0")
def getFormula(self):
"""
Gets the value of :py:attr:`formula`.
@@ -1926,6 +2088,8 @@ class RFormulaModel(JavaModel):
.. note:: Experimental
Model fitted by :py:class:`RFormula`.
+
+ .. versionadded:: 1.5.0
"""