[SPARK-10286][ML][PYSPARK][DOCS] Add @since annotation to pyspark.ml.param and pyspark.ml.*

Author: lihao <lihaowhu@gmail.com> Closes #9275 from lidinghao/SPARK-10286.
author: lihao <lihaowhu@gmail.com> 2015-11-02 16:09:22 -0800
committer: Xiangrui Meng <meng@databricks.com> 2015-11-02 16:09:22 -0800
commit: ecfb3e73fd0a99f0be96034710974e78b6f9d624 (patch)
tree: 7a7b2d6d2052aa0ef524ad65ccaab77b4b6289b4 /python/pyspark/ml/feature.py
parent: 2804674a7af8f11eeb1280459bc9145815398eed (diff)
download: spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.gz
spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.bz2
spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.zip
1 files changed, 164 insertions, 0 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 55bde6d0ea..c7b6dd926c 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -19,6 +19,7 @@ import sys
 if sys.version > '3':
     basestring = str
 
+from pyspark import since
 from pyspark.rdd import ignore_unicode_prefix
 from pyspark.ml.param.shared import *
 from pyspark.ml.util import keyword_only
@@ -51,6 +52,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
     >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}
     >>> binarizer.transform(df, params).head().vector
     1.0
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -71,6 +74,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
         """
         setParams(self, threshold=0.0, inputCol=None, outputCol=None)
@@ -79,6 +83,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setThreshold(self, value):
         """
         Sets the value of :py:attr:`threshold`.
@@ -86,6 +91,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.threshold] = value
         return self
 
+    @since("1.4.0")
     def getThreshold(self):
         """
         Gets the value of threshold or its default value.
@@ -114,6 +120,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
     2.0
     >>> bucketizer.setParams(outputCol="b").transform(df).head().b
     0.0
+
+    .. versionadded:: 1.3.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -150,6 +158,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, splits=None, inputCol=None, outputCol=None):
         """
         setParams(self, splits=None, inputCol=None, outputCol=None)
@@ -158,6 +167,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setSplits(self, value):
         """
         Sets the value of :py:attr:`splits`.
@@ -165,6 +175,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.splits] = value
         return self
 
+    @since("1.4.0")
     def getSplits(self):
         """
         Gets the value of threshold or its default value.
@@ -194,6 +205,8 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
     ...
     >>> sorted(map(str, model.vocabulary))
     ['a', 'b', 'c']
+
+    .. versionadded:: 1.6.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -242,6 +255,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None):
         """
         setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None)
@@ -250,6 +264,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setMinTF(self, value):
         """
         Sets the value of :py:attr:`minTF`.
@@ -257,12 +272,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.minTF] = value
         return self
 
+    @since("1.6.0")
     def getMinTF(self):
         """
         Gets the value of minTF or its default value.
         """
         return self.getOrDefault(self.minTF)
 
+    @since("1.6.0")
     def setMinDF(self, value):
         """
         Sets the value of :py:attr:`minDF`.
@@ -270,12 +287,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.minDF] = value
         return self
 
+    @since("1.6.0")
     def getMinDF(self):
         """
         Gets the value of minDF or its default value.
         """
         return self.getOrDefault(self.minDF)
 
+    @since("1.6.0")
     def setVocabSize(self, value):
         """
         Sets the value of :py:attr:`vocabSize`.
@@ -283,6 +302,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.vocabSize] = value
         return self
 
+    @since("1.6.0")
     def getVocabSize(self):
         """
         Gets the value of vocabSize or its default value.
@@ -298,9 +318,12 @@ class CountVectorizerModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by CountVectorizer.
+
+    .. versionadded:: 1.6.0
     """
 
     @property
+    @since("1.6.0")
     def vocabulary(self):
         """
         An array of terms in the vocabulary.
@@ -331,6 +354,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
     >>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2)
     >>> df3.head().origVec
     DenseVector([5.0, 8.0, 6.0])
+
+    .. versionadded:: 1.6.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -351,6 +376,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, inverse=False, inputCol=None, outputCol=None):
         """
         setParams(self, inverse=False, inputCol=None, outputCol=None)
@@ -359,6 +385,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setInverse(self, value):
         """
         Sets the value of :py:attr:`inverse`.
@@ -366,6 +393,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.inverse] = value
         return self
 
+    @since("1.6.0")
     def getInverse(self):
         """
         Gets the value of inverse or its default value.
@@ -390,6 +418,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
     DenseVector([2.0, 2.0, 9.0])
     >>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod
     DenseVector([4.0, 3.0, 15.0])
+
+    .. versionadded:: 1.5.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -410,6 +440,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.5.0")
     def setParams(self, scalingVec=None, inputCol=None, outputCol=None):
         """
         setParams(self, scalingVec=None, inputCol=None, outputCol=None)
@@ -418,6 +449,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.5.0")
     def setScalingVec(self, value):
         """
         Sets the value of :py:attr:`scalingVec`.
@@ -425,6 +457,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.scalingVec] = value
         return self
 
+    @since("1.5.0")
     def getScalingVec(self):
         """
         Gets the value of scalingVec or its default value.
@@ -449,6 +482,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
     >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"}
     >>> hashingTF.transform(df, params).head().vector
     SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0})
+
+    .. versionadded:: 1.3.0
     """
 
     @keyword_only
@@ -463,6 +498,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.3.0")
     def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
         """
         setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None)
@@ -490,6 +526,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
     >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}
     >>> idf.fit(df, params).transform(df).head().vector
     DenseVector([0.2877, 0.0])
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -510,6 +548,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
         """
         setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
@@ -518,6 +557,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setMinDocFreq(self, value):
         """
         Sets the value of :py:attr:`minDocFreq`.
@@ -525,6 +565,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.minDocFreq] = value
         return self
 
+    @since("1.4.0")
     def getMinDocFreq(self):
         """
         Gets the value of minDocFreq or its default value.
@@ -540,6 +581,8 @@ class IDFModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by IDF.
+
+    .. versionadded:: 1.4.0
     """
 
 
@@ -571,6 +614,8 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
     |[2.0]| [1.0]|
     +-----+------+
     ...
+
+    .. versionadded:: 1.6.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -591,6 +636,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
         """
         setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
@@ -599,6 +645,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setMin(self, value):
         """
         Sets the value of :py:attr:`min`.
@@ -606,12 +653,14 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.min] = value
         return self
 
+    @since("1.6.0")
     def getMin(self):
         """
         Gets the value of min or its default value.
         """
         return self.getOrDefault(self.min)
 
+    @since("1.6.0")
     def setMax(self, value):
         """
         Sets the value of :py:attr:`max`.
@@ -619,6 +668,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.max] = value
         return self
 
+    @since("1.6.0")
     def getMax(self):
         """
         Gets the value of max or its default value.
@@ -634,6 +684,8 @@ class MinMaxScalerModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by :py:class:`MinMaxScaler`.
+
+    .. versionadded:: 1.6.0
     """
 
 
@@ -668,6 +720,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
     Traceback (most recent call last):
         ...
     TypeError: Method setParams forces keyword arguments.
+
+    .. versionadded:: 1.5.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -686,6 +740,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.5.0")
     def setParams(self, n=2, inputCol=None, outputCol=None):
         """
         setParams(self, n=2, inputCol=None, outputCol=None)
@@ -694,6 +749,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.5.0")
     def setN(self, value):
         """
         Sets the value of :py:attr:`n`.
@@ -701,6 +757,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.n] = value
         return self
 
+    @since("1.5.0")
     def getN(self):
         """
         Gets the value of n or its default value.
@@ -726,6 +783,8 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
     >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}
     >>> normalizer.transform(df, params).head().vector
     DenseVector([0.4286, -0.5714])
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -744,6 +803,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, p=2.0, inputCol=None, outputCol=None):
         """
         setParams(self, p=2.0, inputCol=None, outputCol=None)
@@ -752,6 +812,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setP(self, value):
         """
         Sets the value of :py:attr:`p`.
@@ -759,6 +820,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.p] = value
         return self
 
+    @since("1.4.0")
     def getP(self):
         """
         Gets the value of p or its default value.
@@ -800,6 +862,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
     >>> params = {encoder.dropLast: False, encoder.outputCol: "test"}
     >>> encoder.transform(td, params).head().test
     SparseVector(3, {0: 1.0})
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -818,6 +882,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, dropLast=True, inputCol=None, outputCol=None):
         """
         setParams(self, dropLast=True, inputCol=None, outputCol=None)
@@ -826,6 +891,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setDropLast(self, value):
         """
         Sets the value of :py:attr:`dropLast`.
@@ -833,6 +899,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.dropLast] = value
         return self
 
+    @since("1.4.0")
     def getDropLast(self):
         """
         Gets the value of dropLast or its default value.
@@ -858,6 +925,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
     DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
     >>> px.setParams(outputCol="test").transform(df).head().test
     DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -877,6 +946,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, degree=2, inputCol=None, outputCol=None):
         """
         setParams(self, degree=2, inputCol=None, outputCol=None)
@@ -885,6 +955,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setDegree(self, value):
         """
         Sets the value of :py:attr:`degree`.
@@ -892,6 +963,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.degree] = value
         return self
 
+    @since("1.4.0")
     def getDegree(self):
         """
         Gets the value of degree or its default value.
@@ -929,6 +1001,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     Traceback (most recent call last):
         ...
     TypeError: Method setParams forces keyword arguments.
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -951,6 +1025,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None):
         """
         setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None)
@@ -959,6 +1034,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setMinTokenLength(self, value):
         """
         Sets the value of :py:attr:`minTokenLength`.
@@ -966,12 +1042,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.minTokenLength] = value
         return self
 
+    @since("1.4.0")
     def getMinTokenLength(self):
         """
         Gets the value of minTokenLength or its default value.
         """
         return self.getOrDefault(self.minTokenLength)
 
+    @since("1.4.0")
     def setGaps(self, value):
         """
         Sets the value of :py:attr:`gaps`.
@@ -979,12 +1057,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.gaps] = value
         return self
 
+    @since("1.4.0")
     def getGaps(self):
         """
         Gets the value of gaps or its default value.
         """
         return self.getOrDefault(self.gaps)
 
+    @since("1.4.0")
     def setPattern(self, value):
         """
         Sets the value of :py:attr:`pattern`.
@@ -992,6 +1072,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.pattern] = value
         return self
 
+    @since("1.4.0")
     def getPattern(self):
         """
         Gets the value of pattern or its default value.
@@ -1013,6 +1094,8 @@ class SQLTransformer(JavaTransformer):
     ...     statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
     >>> sqlTrans.transform(df).head()
     Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0)
+
+    .. versionadded:: 1.6.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1030,6 +1113,7 @@ class SQLTransformer(JavaTransformer):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, statement=None):
         """
         setParams(self, statement=None)
@@ -1038,6 +1122,7 @@ class SQLTransformer(JavaTransformer):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setStatement(self, value):
         """
         Sets the value of :py:attr:`statement`.
@@ -1045,6 +1130,7 @@ class SQLTransformer(JavaTransformer):
         self._paramMap[self.statement] = value
         return self
 
+    @since("1.6.0")
     def getStatement(self):
         """
         Gets the value of statement or its default value.
@@ -1070,6 +1156,8 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
     DenseVector([1.4142])
     >>> model.transform(df).collect()[1].scaled
     DenseVector([1.4142])
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1090,6 +1178,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
         """
         setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
@@ -1098,6 +1187,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setWithMean(self, value):
         """
         Sets the value of :py:attr:`withMean`.
@@ -1105,12 +1195,14 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.withMean] = value
         return self
 
+    @since("1.4.0")
     def getWithMean(self):
         """
         Gets the value of withMean or its default value.
         """
         return self.getOrDefault(self.withMean)
 
+    @since("1.4.0")
     def setWithStd(self, value):
         """
         Sets the value of :py:attr:`withStd`.
@@ -1118,6 +1210,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.withStd] = value
         return self
 
+    @since("1.4.0")
     def getWithStd(self):
         """
         Gets the value of withStd or its default value.
@@ -1133,9 +1226,12 @@ class StandardScalerModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by StandardScaler.
+
+    .. versionadded:: 1.4.0
     """
 
     @property
+    @since("1.5.0")
     def std(self):
         """
         Standard deviation of the StandardScalerModel.
@@ -1143,6 +1239,7 @@ class StandardScalerModel(JavaModel):
         return self._call_java("std")
 
     @property
+    @since("1.5.0")
     def mean(self):
         """
         Mean of the StandardScalerModel.
@@ -1171,6 +1268,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid):
     >>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]),
     ...     key=lambda x: x[0])
     [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
+
+    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1185,6 +1284,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"):
         """
         setParams(self, inputCol=None, outputCol=None, handleInvalid="error")
@@ -1202,8 +1302,11 @@ class StringIndexerModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by StringIndexer.
+
+    .. versionadded:: 1.4.0
     """
     @property
+    @since("1.5.0")
     def labels(self):
         """
         Ordered list of labels, corresponding to indices to be assigned.
@@ -1221,6 +1324,8 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
     The index-string mapping is either from the ML attributes of the input column,
     or from user-supplied labels (which take precedence over ML attributes).
     See L{StringIndexer} for converting strings into indices.
+
+    .. versionadded:: 1.6.0
     """
 
     # a placeholder to make the labels show up in generated doc
@@ -1243,6 +1348,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, inputCol=None, outputCol=None, labels=None):
         """
         setParams(self, inputCol=None, outputCol=None, labels=None)
@@ -1251,6 +1357,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setLabels(self, value):
         """
         Sets the value of :py:attr:`labels`.
@@ -1258,6 +1365,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.labels] = value
         return self
 
+    @since("1.6.0")
     def getLabels(self):
         """
         Gets the value of :py:attr:`labels` or its default value.
@@ -1271,6 +1379,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
 
     A feature transformer that filters out stop words from input.
     Note: null values from input array are preserved unless adding null to stopWords explicitly.
+
+    .. versionadded:: 1.6.0
     """
     # a placeholder to make the stopwords show up in generated doc
     stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out")
@@ -1297,6 +1407,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, inputCol=None, outputCol=None, stopWords=None,
                   caseSensitive=False):
         """
@@ -1307,6 +1418,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setStopWords(self, value):
         """
         Specify the stopwords to be filtered.
@@ -1314,12 +1426,14 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.stopWords] = value
         return self
 
+    @since("1.6.0")
     def getStopWords(self):
         """
         Get the stopwords.
         """
         return self.getOrDefault(self.stopWords)
 
+    @since("1.6.0")
     def setCaseSensitive(self, value):
         """
         Set whether to do a case sensitive comparison over the stop words
@@ -1327,6 +1441,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.caseSensitive] = value
         return self
 
+    @since("1.6.0")
     def getCaseSensitive(self):
         """
         Get whether to do a case sensitive comparison over the stop words.
@@ -1360,6 +1475,8 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     Traceback (most recent call last):
         ...
     TypeError: Method setParams forces keyword arguments.
+
+    .. versionadded:: 1.3.0
     """
 
     @keyword_only
@@ -1373,6 +1490,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.3.0")
     def setParams(self, inputCol=None, outputCol=None):
         """
         setParams(self, inputCol="input", outputCol="output")
@@ -1398,6 +1516,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
     >>> vecAssembler.transform(df, params).head().vector
     DenseVector([0.0, 1.0])
+
+    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1411,6 +1531,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, inputCols=None, outputCol=None):
         """
         setParams(self, inputCols=None, outputCol=None)
@@ -1477,6 +1598,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
     >>> model2 = indexer.fit(df, params)
     >>> model2.transform(df).head().vector
     DenseVector([1.0, 0.0])
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1501,6 +1624,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, maxCategories=20, inputCol=None, outputCol=None):
         """
         setParams(self, maxCategories=20, inputCol=None, outputCol=None)
@@ -1509,6 +1633,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setMaxCategories(self, value):
         """
         Sets the value of :py:attr:`maxCategories`.
@@ -1516,6 +1641,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.maxCategories] = value
         return self
 
+    @since("1.4.0")
     def getMaxCategories(self):
         """
         Gets the value of maxCategories or its default value.
@@ -1531,9 +1657,12 @@ class VectorIndexerModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by VectorIndexer.
+
+    .. versionadded:: 1.4.0
     """
 
     @property
+    @since("1.4.0")
     def numFeatures(self):
         """
         Number of features, i.e., length of Vectors which this transforms.
@@ -1541,6 +1670,7 @@ class VectorIndexerModel(JavaModel):
         return self._call_java("numFeatures")
 
     @property
+    @since("1.4.0")
     def categoryMaps(self):
         """
         Feature value index.  Keys are categorical feature indices (column indices).
@@ -1573,6 +1703,8 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
     >>> vs = VectorSlicer(inputCol="features", outputCol="sliced", indices=[1, 4])
     >>> vs.transform(df).head().sliced
     DenseVector([2.3, 1.0])
+
+    .. versionadded:: 1.6.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1600,6 +1732,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.6.0")
     def setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
         """
         setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
@@ -1608,6 +1741,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.6.0")
     def setIndices(self, value):
         """
         Sets the value of :py:attr:`indices`.
@@ -1615,12 +1749,14 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.indices] = value
         return self
 
+    @since("1.6.0")
     def getIndices(self):
         """
         Gets the value of indices or its default value.
         """
         return self.getOrDefault(self.indices)
 
+    @since("1.6.0")
     def setNames(self, value):
         """
         Sets the value of :py:attr:`names`.
@@ -1628,6 +1764,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
         self._paramMap[self.names] = value
         return self
 
+    @since("1.6.0")
     def getNames(self):
         """
         Gets the value of names or its default value.
@@ -1666,6 +1803,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     ...
     >>> model.transform(doc).head().model
     DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
+
+    .. versionadded:: 1.4.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1699,6 +1838,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.4.0")
     def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                   seed=None, inputCol=None, outputCol=None):
         """
@@ -1709,6 +1849,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.4.0")
     def setVectorSize(self, value):
         """
         Sets the value of :py:attr:`vectorSize`.
@@ -1716,12 +1857,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
         self._paramMap[self.vectorSize] = value
         return self
 
+    @since("1.4.0")
     def getVectorSize(self):
         """
         Gets the value of vectorSize or its default value.
         """
         return self.getOrDefault(self.vectorSize)
 
+    @since("1.4.0")
     def setNumPartitions(self, value):
         """
         Sets the value of :py:attr:`numPartitions`.
@@ -1729,12 +1872,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
         self._paramMap[self.numPartitions] = value
         return self
 
+    @since("1.4.0")
     def getNumPartitions(self):
         """
         Gets the value of numPartitions or its default value.
         """
         return self.getOrDefault(self.numPartitions)
 
+    @since("1.4.0")
     def setMinCount(self, value):
         """
         Sets the value of :py:attr:`minCount`.
@@ -1742,6 +1887,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
         self._paramMap[self.minCount] = value
         return self
 
+    @since("1.4.0")
     def getMinCount(self):
         """
         Gets the value of minCount or its default value.
@@ -1757,8 +1903,11 @@ class Word2VecModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by Word2Vec.
+
+    .. versionadded:: 1.4.0
     """
 
+    @since("1.5.0")
     def getVectors(self):
         """
         Returns the vector representation of the words as a dataframe
@@ -1766,6 +1915,7 @@ class Word2VecModel(JavaModel):
         """
         return self._call_java("getVectors")
 
+    @since("1.5.0")
     def findSynonyms(self, word, num):
         """
         Find "num" number of words closest in similarity to "word".
@@ -1794,6 +1944,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
     >>> model = pca.fit(df)
     >>> model.transform(df).collect()[0].pca_features
     DenseVector([1.648..., -4.013...])
+
+    .. versionadded:: 1.5.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1811,6 +1963,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.5.0")
     def setParams(self, k=None, inputCol=None, outputCol=None):
         """
         setParams(self, k=None, inputCol=None, outputCol=None)
@@ -1819,6 +1972,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.5.0")
     def setK(self, value):
         """
         Sets the value of :py:attr:`k`.
@@ -1826,6 +1980,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
         self._paramMap[self.k] = value
         return self
 
+    @since("1.5.0")
     def getK(self):
         """
         Gets the value of k or its default value.
@@ -1841,6 +1996,8 @@ class PCAModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by PCA.
+
+    .. versionadded:: 1.5.0
     """
 
 
@@ -1879,6 +2036,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
     |0.0|0.0|  a|   [0.0]|  0.0|
     +---+---+---+--------+-----+
     ...
+
+    .. versionadded:: 1.5.0
     """
 
     # a placeholder to make it appear in the generated doc
@@ -1896,6 +2055,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
         self.setParams(**kwargs)
 
     @keyword_only
+    @since("1.5.0")
     def setParams(self, formula=None, featuresCol="features", labelCol="label"):
         """
         setParams(self, formula=None, featuresCol="features", labelCol="label")
@@ -1904,6 +2064,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    @since("1.5.0")
     def setFormula(self, value):
         """
         Sets the value of :py:attr:`formula`.
@@ -1911,6 +2072,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
         self._paramMap[self.formula] = value
         return self
 
+    @since("1.5.0")
     def getFormula(self):
         """
         Gets the value of :py:attr:`formula`.
@@ -1926,6 +2088,8 @@ class RFormulaModel(JavaModel):
     .. note:: Experimental
 
     Model fitted by :py:class:`RFormula`.
+
+    .. versionadded:: 1.5.0
     """
author	lihao <lihaowhu@gmail.com>	2015-11-02 16:09:22 -0800
committer	Xiangrui Meng <meng@databricks.com>	2015-11-02 16:09:22 -0800
commit	ecfb3e73fd0a99f0be96034710974e78b6f9d624 (patch)
tree	7a7b2d6d2052aa0ef524ad65ccaab77b4b6289b4 /python/pyspark/ml/feature.py
parent	2804674a7af8f11eeb1280459bc9145815398eed (diff)
download	spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.gz spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.bz2 spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.zip