aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorlihao <lihaowhu@gmail.com>2015-11-02 16:09:22 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-02 16:09:22 -0800
commitecfb3e73fd0a99f0be96034710974e78b6f9d624 (patch)
tree7a7b2d6d2052aa0ef524ad65ccaab77b4b6289b4 /python
parent2804674a7af8f11eeb1280459bc9145815398eed (diff)
downloadspark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.gz
spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.tar.bz2
spark-ecfb3e73fd0a99f0be96034710974e78b6f9d624.zip
[SPARK-10286][ML][PYSPARK][DOCS] Add @since annotation to pyspark.ml.param and pyspark.ml.*
Author: lihao <lihaowhu@gmail.com> Closes #9275 from lidinghao/SPARK-10286.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/ml/evaluation.py20
-rw-r--r--python/pyspark/ml/feature.py164
-rw-r--r--python/pyspark/ml/param/__init__.py16
-rw-r--r--python/pyspark/ml/pipeline.py30
4 files changed, 230 insertions, 0 deletions
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index cb3b07947e..dcc1738ec5 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -17,6 +17,7 @@
from abc import abstractmethod, ABCMeta
+from pyspark import since
from pyspark.ml.wrapper import JavaWrapper
from pyspark.ml.param import Param, Params
from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol
@@ -31,6 +32,8 @@ __all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
class Evaluator(Params):
"""
Base class for evaluators that compute metrics from predictions.
+
+ .. versionadded:: 1.4.0
"""
__metaclass__ = ABCMeta
@@ -46,6 +49,7 @@ class Evaluator(Params):
"""
raise NotImplementedError()
+ @since("1.4.0")
def evaluate(self, dataset, params=None):
"""
Evaluates the output with optional parameters.
@@ -66,6 +70,7 @@ class Evaluator(Params):
else:
raise ValueError("Params must be a param map but got %s." % type(params))
+ @since("1.5.0")
def isLargerBetter(self):
"""
Indicates whether the metric returned by :py:meth:`evaluate` should be maximized
@@ -114,6 +119,8 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
0.70...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
0.83...
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -138,6 +145,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
+ @since("1.4.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
@@ -145,6 +153,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
self._paramMap[self.metricName] = value
return self
+ @since("1.4.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
@@ -152,6 +161,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
return self.getOrDefault(self.metricName)
@keyword_only
+ @since("1.4.0")
def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
metricName="areaUnderROC"):
"""
@@ -180,6 +190,8 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
0.993...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
2.649...
+
+ .. versionadded:: 1.4.0
"""
# Because we will maximize evaluation value (ref: `CrossValidator`),
# when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`),
@@ -205,6 +217,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
+ @since("1.4.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
@@ -212,6 +225,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
self._paramMap[self.metricName] = value
return self
+ @since("1.4.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
@@ -219,6 +233,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
return self.getOrDefault(self.metricName)
@keyword_only
+ @since("1.4.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="rmse"):
"""
@@ -246,6 +261,8 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
0.66...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "recall"})
0.66...
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
metricName = Param(Params._dummy(), "metricName",
@@ -271,6 +288,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
+ @since("1.5.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
@@ -278,6 +296,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
self._paramMap[self.metricName] = value
return self
+ @since("1.5.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
@@ -285,6 +304,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
return self.getOrDefault(self.metricName)
@keyword_only
+ @since("1.5.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="f1"):
"""
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 55bde6d0ea..c7b6dd926c 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -19,6 +19,7 @@ import sys
if sys.version > '3':
basestring = str
+from pyspark import since
from pyspark.rdd import ignore_unicode_prefix
from pyspark.ml.param.shared import *
from pyspark.ml.util import keyword_only
@@ -51,6 +52,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
>>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}
>>> binarizer.transform(df, params).head().vector
1.0
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -71,6 +74,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
"""
setParams(self, threshold=0.0, inputCol=None, outputCol=None)
@@ -79,6 +83,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setThreshold(self, value):
"""
Sets the value of :py:attr:`threshold`.
@@ -86,6 +91,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.threshold] = value
return self
+ @since("1.4.0")
def getThreshold(self):
"""
Gets the value of threshold or its default value.
@@ -114,6 +120,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
2.0
>>> bucketizer.setParams(outputCol="b").transform(df).head().b
0.0
+
+ .. versionadded:: 1.3.0
"""
# a placeholder to make it appear in the generated doc
@@ -150,6 +158,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, splits=None, inputCol=None, outputCol=None):
"""
setParams(self, splits=None, inputCol=None, outputCol=None)
@@ -158,6 +167,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setSplits(self, value):
"""
Sets the value of :py:attr:`splits`.
@@ -165,6 +175,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.splits] = value
return self
+ @since("1.4.0")
def getSplits(self):
"""
Gets the value of threshold or its default value.
@@ -194,6 +205,8 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
...
>>> sorted(map(str, model.vocabulary))
['a', 'b', 'c']
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -242,6 +255,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None):
"""
setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None)
@@ -250,6 +264,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setMinTF(self, value):
"""
Sets the value of :py:attr:`minTF`.
@@ -257,12 +272,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.minTF] = value
return self
+ @since("1.6.0")
def getMinTF(self):
"""
Gets the value of minTF or its default value.
"""
return self.getOrDefault(self.minTF)
+ @since("1.6.0")
def setMinDF(self, value):
"""
Sets the value of :py:attr:`minDF`.
@@ -270,12 +287,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.minDF] = value
return self
+ @since("1.6.0")
def getMinDF(self):
"""
Gets the value of minDF or its default value.
"""
return self.getOrDefault(self.minDF)
+ @since("1.6.0")
def setVocabSize(self, value):
"""
Sets the value of :py:attr:`vocabSize`.
@@ -283,6 +302,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.vocabSize] = value
return self
+ @since("1.6.0")
def getVocabSize(self):
"""
Gets the value of vocabSize or its default value.
@@ -298,9 +318,12 @@ class CountVectorizerModel(JavaModel):
.. note:: Experimental
Model fitted by CountVectorizer.
+
+ .. versionadded:: 1.6.0
"""
@property
+ @since("1.6.0")
def vocabulary(self):
"""
An array of terms in the vocabulary.
@@ -331,6 +354,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
>>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2)
>>> df3.head().origVec
DenseVector([5.0, 8.0, 6.0])
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -351,6 +376,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inverse=False, inputCol=None, outputCol=None):
"""
setParams(self, inverse=False, inputCol=None, outputCol=None)
@@ -359,6 +385,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setInverse(self, value):
"""
Sets the value of :py:attr:`inverse`.
@@ -366,6 +393,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.inverse] = value
return self
+ @since("1.6.0")
def getInverse(self):
"""
Gets the value of inverse or its default value.
@@ -390,6 +418,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
DenseVector([2.0, 2.0, 9.0])
>>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod
DenseVector([4.0, 3.0, 15.0])
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -410,6 +440,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, scalingVec=None, inputCol=None, outputCol=None):
"""
setParams(self, scalingVec=None, inputCol=None, outputCol=None)
@@ -418,6 +449,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setScalingVec(self, value):
"""
Sets the value of :py:attr:`scalingVec`.
@@ -425,6 +457,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.scalingVec] = value
return self
+ @since("1.5.0")
def getScalingVec(self):
"""
Gets the value of scalingVec or its default value.
@@ -449,6 +482,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
>>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"}
>>> hashingTF.transform(df, params).head().vector
SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0})
+
+ .. versionadded:: 1.3.0
"""
@keyword_only
@@ -463,6 +498,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
self.setParams(**kwargs)
@keyword_only
+ @since("1.3.0")
def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
"""
setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None)
@@ -490,6 +526,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
>>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}
>>> idf.fit(df, params).transform(df).head().vector
DenseVector([0.2877, 0.0])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -510,6 +548,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
"""
setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
@@ -518,6 +557,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setMinDocFreq(self, value):
"""
Sets the value of :py:attr:`minDocFreq`.
@@ -525,6 +565,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.minDocFreq] = value
return self
+ @since("1.4.0")
def getMinDocFreq(self):
"""
Gets the value of minDocFreq or its default value.
@@ -540,6 +581,8 @@ class IDFModel(JavaModel):
.. note:: Experimental
Model fitted by IDF.
+
+ .. versionadded:: 1.4.0
"""
@@ -571,6 +614,8 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
|[2.0]| [1.0]|
+-----+------+
...
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -591,6 +636,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
"""
setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
@@ -599,6 +645,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setMin(self, value):
"""
Sets the value of :py:attr:`min`.
@@ -606,12 +653,14 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.min] = value
return self
+ @since("1.6.0")
def getMin(self):
"""
Gets the value of min or its default value.
"""
return self.getOrDefault(self.min)
+ @since("1.6.0")
def setMax(self, value):
"""
Sets the value of :py:attr:`max`.
@@ -619,6 +668,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.max] = value
return self
+ @since("1.6.0")
def getMax(self):
"""
Gets the value of max or its default value.
@@ -634,6 +684,8 @@ class MinMaxScalerModel(JavaModel):
.. note:: Experimental
Model fitted by :py:class:`MinMaxScaler`.
+
+ .. versionadded:: 1.6.0
"""
@@ -668,6 +720,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -686,6 +740,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, n=2, inputCol=None, outputCol=None):
"""
setParams(self, n=2, inputCol=None, outputCol=None)
@@ -694,6 +749,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setN(self, value):
"""
Sets the value of :py:attr:`n`.
@@ -701,6 +757,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.n] = value
return self
+ @since("1.5.0")
def getN(self):
"""
Gets the value of n or its default value.
@@ -726,6 +783,8 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
>>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}
>>> normalizer.transform(df, params).head().vector
DenseVector([0.4286, -0.5714])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -744,6 +803,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, p=2.0, inputCol=None, outputCol=None):
"""
setParams(self, p=2.0, inputCol=None, outputCol=None)
@@ -752,6 +812,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setP(self, value):
"""
Sets the value of :py:attr:`p`.
@@ -759,6 +820,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.p] = value
return self
+ @since("1.4.0")
def getP(self):
"""
Gets the value of p or its default value.
@@ -800,6 +862,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
>>> params = {encoder.dropLast: False, encoder.outputCol: "test"}
>>> encoder.transform(td, params).head().test
SparseVector(3, {0: 1.0})
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -818,6 +882,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, dropLast=True, inputCol=None, outputCol=None):
"""
setParams(self, dropLast=True, inputCol=None, outputCol=None)
@@ -826,6 +891,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setDropLast(self, value):
"""
Sets the value of :py:attr:`dropLast`.
@@ -833,6 +899,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.dropLast] = value
return self
+ @since("1.4.0")
def getDropLast(self):
"""
Gets the value of dropLast or its default value.
@@ -858,6 +925,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
>>> px.setParams(outputCol="test").transform(df).head().test
DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -877,6 +946,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, degree=2, inputCol=None, outputCol=None):
"""
setParams(self, degree=2, inputCol=None, outputCol=None)
@@ -885,6 +955,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setDegree(self, value):
"""
Sets the value of :py:attr:`degree`.
@@ -892,6 +963,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.degree] = value
return self
+ @since("1.4.0")
def getDegree(self):
"""
Gets the value of degree or its default value.
@@ -929,6 +1001,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -951,6 +1025,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None):
"""
setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None)
@@ -959,6 +1034,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setMinTokenLength(self, value):
"""
Sets the value of :py:attr:`minTokenLength`.
@@ -966,12 +1042,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.minTokenLength] = value
return self
+ @since("1.4.0")
def getMinTokenLength(self):
"""
Gets the value of minTokenLength or its default value.
"""
return self.getOrDefault(self.minTokenLength)
+ @since("1.4.0")
def setGaps(self, value):
"""
Sets the value of :py:attr:`gaps`.
@@ -979,12 +1057,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.gaps] = value
return self
+ @since("1.4.0")
def getGaps(self):
"""
Gets the value of gaps or its default value.
"""
return self.getOrDefault(self.gaps)
+ @since("1.4.0")
def setPattern(self, value):
"""
Sets the value of :py:attr:`pattern`.
@@ -992,6 +1072,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.pattern] = value
return self
+ @since("1.4.0")
def getPattern(self):
"""
Gets the value of pattern or its default value.
@@ -1013,6 +1094,8 @@ class SQLTransformer(JavaTransformer):
... statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
>>> sqlTrans.transform(df).head()
Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0)
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -1030,6 +1113,7 @@ class SQLTransformer(JavaTransformer):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, statement=None):
"""
setParams(self, statement=None)
@@ -1038,6 +1122,7 @@ class SQLTransformer(JavaTransformer):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setStatement(self, value):
"""
Sets the value of :py:attr:`statement`.
@@ -1045,6 +1130,7 @@ class SQLTransformer(JavaTransformer):
self._paramMap[self.statement] = value
return self
+ @since("1.6.0")
def getStatement(self):
"""
Gets the value of statement or its default value.
@@ -1070,6 +1156,8 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
DenseVector([1.4142])
>>> model.transform(df).collect()[1].scaled
DenseVector([1.4142])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -1090,6 +1178,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
"""
setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
@@ -1098,6 +1187,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setWithMean(self, value):
"""
Sets the value of :py:attr:`withMean`.
@@ -1105,12 +1195,14 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.withMean] = value
return self
+ @since("1.4.0")
def getWithMean(self):
"""
Gets the value of withMean or its default value.
"""
return self.getOrDefault(self.withMean)
+ @since("1.4.0")
def setWithStd(self, value):
"""
Sets the value of :py:attr:`withStd`.
@@ -1118,6 +1210,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.withStd] = value
return self
+ @since("1.4.0")
def getWithStd(self):
"""
Gets the value of withStd or its default value.
@@ -1133,9 +1226,12 @@ class StandardScalerModel(JavaModel):
.. note:: Experimental
Model fitted by StandardScaler.
+
+ .. versionadded:: 1.4.0
"""
@property
+ @since("1.5.0")
def std(self):
"""
Standard deviation of the StandardScalerModel.
@@ -1143,6 +1239,7 @@ class StandardScalerModel(JavaModel):
return self._call_java("std")
@property
+ @since("1.5.0")
def mean(self):
"""
Mean of the StandardScalerModel.
@@ -1171,6 +1268,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid):
>>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]),
... key=lambda x: x[0])
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
+
+ .. versionadded:: 1.4.0
"""
@keyword_only
@@ -1185,6 +1284,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"):
"""
setParams(self, inputCol=None, outputCol=None, handleInvalid="error")
@@ -1202,8 +1302,11 @@ class StringIndexerModel(JavaModel):
.. note:: Experimental
Model fitted by StringIndexer.
+
+ .. versionadded:: 1.4.0
"""
@property
+ @since("1.5.0")
def labels(self):
"""
Ordered list of labels, corresponding to indices to be assigned.
@@ -1221,6 +1324,8 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
The index-string mapping is either from the ML attributes of the input column,
or from user-supplied labels (which take precedence over ML attributes).
See L{StringIndexer} for converting strings into indices.
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make the labels show up in generated doc
@@ -1243,6 +1348,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inputCol=None, outputCol=None, labels=None):
"""
setParams(self, inputCol=None, outputCol=None, labels=None)
@@ -1251,6 +1357,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setLabels(self, value):
"""
Sets the value of :py:attr:`labels`.
@@ -1258,6 +1365,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.labels] = value
return self
+ @since("1.6.0")
def getLabels(self):
"""
Gets the value of :py:attr:`labels` or its default value.
@@ -1271,6 +1379,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
A feature transformer that filters out stop words from input.
Note: null values from input array are preserved unless adding null to stopWords explicitly.
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make the stopwords show up in generated doc
stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out")
@@ -1297,6 +1407,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inputCol=None, outputCol=None, stopWords=None,
caseSensitive=False):
"""
@@ -1307,6 +1418,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setStopWords(self, value):
"""
Specify the stopwords to be filtered.
@@ -1314,12 +1426,14 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.stopWords] = value
return self
+ @since("1.6.0")
def getStopWords(self):
"""
Get the stopwords.
"""
return self.getOrDefault(self.stopWords)
+ @since("1.6.0")
def setCaseSensitive(self, value):
"""
Set whether to do a case sensitive comparison over the stop words
@@ -1327,6 +1441,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.caseSensitive] = value
return self
+ @since("1.6.0")
def getCaseSensitive(self):
"""
Get whether to do a case sensitive comparison over the stop words.
@@ -1360,6 +1475,8 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
+
+ .. versionadded:: 1.3.0
"""
@keyword_only
@@ -1373,6 +1490,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.3.0")
def setParams(self, inputCol=None, outputCol=None):
"""
setParams(self, inputCol="input", outputCol="output")
@@ -1398,6 +1516,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
>>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
>>> vecAssembler.transform(df, params).head().vector
DenseVector([0.0, 1.0])
+
+ .. versionadded:: 1.4.0
"""
@keyword_only
@@ -1411,6 +1531,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, inputCols=None, outputCol=None):
"""
setParams(self, inputCols=None, outputCol=None)
@@ -1477,6 +1598,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
>>> model2 = indexer.fit(df, params)
>>> model2.transform(df).head().vector
DenseVector([1.0, 0.0])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -1501,6 +1624,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, maxCategories=20, inputCol=None, outputCol=None):
"""
setParams(self, maxCategories=20, inputCol=None, outputCol=None)
@@ -1509,6 +1633,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setMaxCategories(self, value):
"""
Sets the value of :py:attr:`maxCategories`.
@@ -1516,6 +1641,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.maxCategories] = value
return self
+ @since("1.4.0")
def getMaxCategories(self):
"""
Gets the value of maxCategories or its default value.
@@ -1531,9 +1657,12 @@ class VectorIndexerModel(JavaModel):
.. note:: Experimental
Model fitted by VectorIndexer.
+
+ .. versionadded:: 1.4.0
"""
@property
+ @since("1.4.0")
def numFeatures(self):
"""
Number of features, i.e., length of Vectors which this transforms.
@@ -1541,6 +1670,7 @@ class VectorIndexerModel(JavaModel):
return self._call_java("numFeatures")
@property
+ @since("1.4.0")
def categoryMaps(self):
"""
Feature value index. Keys are categorical feature indices (column indices).
@@ -1573,6 +1703,8 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
>>> vs = VectorSlicer(inputCol="features", outputCol="sliced", indices=[1, 4])
>>> vs.transform(df).head().sliced
DenseVector([2.3, 1.0])
+
+ .. versionadded:: 1.6.0
"""
# a placeholder to make it appear in the generated doc
@@ -1600,6 +1732,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.6.0")
def setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
"""
setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
@@ -1608,6 +1741,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.6.0")
def setIndices(self, value):
"""
Sets the value of :py:attr:`indices`.
@@ -1615,12 +1749,14 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.indices] = value
return self
+ @since("1.6.0")
def getIndices(self):
"""
Gets the value of indices or its default value.
"""
return self.getOrDefault(self.indices)
+ @since("1.6.0")
def setNames(self, value):
"""
Sets the value of :py:attr:`names`.
@@ -1628,6 +1764,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol):
self._paramMap[self.names] = value
return self
+ @since("1.6.0")
def getNames(self):
"""
Gets the value of names or its default value.
@@ -1666,6 +1803,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
...
>>> model.transform(doc).head().model
DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
+
+ .. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
@@ -1699,6 +1838,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self.setParams(**kwargs)
@keyword_only
+ @since("1.4.0")
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=None, inputCol=None, outputCol=None):
"""
@@ -1709,6 +1849,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.4.0")
def setVectorSize(self, value):
"""
Sets the value of :py:attr:`vectorSize`.
@@ -1716,12 +1857,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._paramMap[self.vectorSize] = value
return self
+ @since("1.4.0")
def getVectorSize(self):
"""
Gets the value of vectorSize or its default value.
"""
return self.getOrDefault(self.vectorSize)
+ @since("1.4.0")
def setNumPartitions(self, value):
"""
Sets the value of :py:attr:`numPartitions`.
@@ -1729,12 +1872,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._paramMap[self.numPartitions] = value
return self
+ @since("1.4.0")
def getNumPartitions(self):
"""
Gets the value of numPartitions or its default value.
"""
return self.getOrDefault(self.numPartitions)
+ @since("1.4.0")
def setMinCount(self, value):
"""
Sets the value of :py:attr:`minCount`.
@@ -1742,6 +1887,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._paramMap[self.minCount] = value
return self
+ @since("1.4.0")
def getMinCount(self):
"""
Gets the value of minCount or its default value.
@@ -1757,8 +1903,11 @@ class Word2VecModel(JavaModel):
.. note:: Experimental
Model fitted by Word2Vec.
+
+ .. versionadded:: 1.4.0
"""
+ @since("1.5.0")
def getVectors(self):
"""
Returns the vector representation of the words as a dataframe
@@ -1766,6 +1915,7 @@ class Word2VecModel(JavaModel):
"""
return self._call_java("getVectors")
+ @since("1.5.0")
def findSynonyms(self, word, num):
"""
Find "num" number of words closest in similarity to "word".
@@ -1794,6 +1944,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
>>> model = pca.fit(df)
>>> model.transform(df).collect()[0].pca_features
DenseVector([1.648..., -4.013...])
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -1811,6 +1963,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, k=None, inputCol=None, outputCol=None):
"""
setParams(self, k=None, inputCol=None, outputCol=None)
@@ -1819,6 +1972,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setK(self, value):
"""
Sets the value of :py:attr:`k`.
@@ -1826,6 +1980,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
self._paramMap[self.k] = value
return self
+ @since("1.5.0")
def getK(self):
"""
Gets the value of k or its default value.
@@ -1841,6 +1996,8 @@ class PCAModel(JavaModel):
.. note:: Experimental
Model fitted by PCA.
+
+ .. versionadded:: 1.5.0
"""
@@ -1879,6 +2036,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
|0.0|0.0| a| [0.0]| 0.0|
+---+---+---+--------+-----+
...
+
+ .. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
@@ -1896,6 +2055,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
self.setParams(**kwargs)
@keyword_only
+ @since("1.5.0")
def setParams(self, formula=None, featuresCol="features", labelCol="label"):
"""
setParams(self, formula=None, featuresCol="features", labelCol="label")
@@ -1904,6 +2064,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
+ @since("1.5.0")
def setFormula(self, value):
"""
Sets the value of :py:attr:`formula`.
@@ -1911,6 +2072,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
self._paramMap[self.formula] = value
return self
+ @since("1.5.0")
def getFormula(self):
"""
Gets the value of :py:attr:`formula`.
@@ -1926,6 +2088,8 @@ class RFormulaModel(JavaModel):
.. note:: Experimental
Model fitted by :py:class:`RFormula`.
+
+ .. versionadded:: 1.5.0
"""
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 2e0c63cb47..35c9b776a3 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -18,6 +18,7 @@
from abc import ABCMeta
import copy
+from pyspark import since
from pyspark.ml.util import Identifiable
@@ -27,6 +28,8 @@ __all__ = ['Param', 'Params']
class Param(object):
"""
A param with self-contained documentation.
+
+ .. versionadded:: 1.3.0
"""
def __init__(self, parent, name, doc):
@@ -56,6 +59,8 @@ class Params(Identifiable):
"""
Components that take parameters. This also provides an internal
param map to store parameter values attached to the instance.
+
+ .. versionadded:: 1.3.0
"""
__metaclass__ = ABCMeta
@@ -72,6 +77,7 @@ class Params(Identifiable):
self._params = None
@property
+ @since("1.3.0")
def params(self):
"""
Returns all params ordered by name. The default implementation
@@ -83,6 +89,7 @@ class Params(Identifiable):
[getattr(self, x) for x in dir(self) if x != "params"]))
return self._params
+ @since("1.4.0")
def explainParam(self, param):
"""
Explains a single param and returns its name, doc, and optional
@@ -100,6 +107,7 @@ class Params(Identifiable):
valueStr = "(" + ", ".join(values) + ")"
return "%s: %s %s" % (param.name, param.doc, valueStr)
+ @since("1.4.0")
def explainParams(self):
"""
Returns the documentation of all params with their optionally
@@ -107,6 +115,7 @@ class Params(Identifiable):
"""
return "\n".join([self.explainParam(param) for param in self.params])
+ @since("1.4.0")
def getParam(self, paramName):
"""
Gets a param by its name.
@@ -117,6 +126,7 @@ class Params(Identifiable):
else:
raise ValueError("Cannot find param with name %s." % paramName)
+ @since("1.4.0")
def isSet(self, param):
"""
Checks whether a param is explicitly set by user.
@@ -124,6 +134,7 @@ class Params(Identifiable):
param = self._resolveParam(param)
return param in self._paramMap
+ @since("1.4.0")
def hasDefault(self, param):
"""
Checks whether a param has a default value.
@@ -131,6 +142,7 @@ class Params(Identifiable):
param = self._resolveParam(param)
return param in self._defaultParamMap
+ @since("1.4.0")
def isDefined(self, param):
"""
Checks whether a param is explicitly set by user or has
@@ -138,6 +150,7 @@ class Params(Identifiable):
"""
return self.isSet(param) or self.hasDefault(param)
+ @since("1.4.0")
def hasParam(self, paramName):
"""
Tests whether this instance contains a param with a given
@@ -146,6 +159,7 @@ class Params(Identifiable):
param = self._resolveParam(paramName)
return param in self.params
+ @since("1.4.0")
def getOrDefault(self, param):
"""
Gets the value of a param in the user-supplied param map or its
@@ -157,6 +171,7 @@ class Params(Identifiable):
else:
return self._defaultParamMap[param]
+ @since("1.4.0")
def extractParamMap(self, extra=None):
"""
Extracts the embedded default param values and user-supplied
@@ -175,6 +190,7 @@ class Params(Identifiable):
paramMap.update(extra)
return paramMap
+ @since("1.4.0")
def copy(self, extra=None):
"""
Creates a copy of this instance with the same uid and some
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 312a8502b3..4475451edb 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -17,6 +17,7 @@
from abc import ABCMeta, abstractmethod
+from pyspark import since
from pyspark.ml.param import Param, Params
from pyspark.ml.util import keyword_only
from pyspark.mllib.common import inherit_doc
@@ -26,6 +27,8 @@ from pyspark.mllib.common import inherit_doc
class Estimator(Params):
"""
Abstract class for estimators that fit models to data.
+
+ .. versionadded:: 1.3.0
"""
__metaclass__ = ABCMeta
@@ -42,6 +45,7 @@ class Estimator(Params):
"""
raise NotImplementedError()
+ @since("1.3.0")
def fit(self, dataset, params=None):
"""
Fits a model to the input dataset with optional parameters.
@@ -73,6 +77,8 @@ class Transformer(Params):
"""
Abstract class for transformers that transform one dataset into
another.
+
+ .. versionadded:: 1.3.0
"""
__metaclass__ = ABCMeta
@@ -88,6 +94,7 @@ class Transformer(Params):
"""
raise NotImplementedError()
+ @since("1.3.0")
def transform(self, dataset, params=None):
"""
Transforms the input dataset with optional parameters.
@@ -113,6 +120,8 @@ class Transformer(Params):
class Model(Transformer):
"""
Abstract class for models that are fitted by estimators.
+
+ .. versionadded:: 1.4.0
"""
__metaclass__ = ABCMeta
@@ -136,6 +145,8 @@ class Pipeline(Estimator):
consists of fitted models and transformers, corresponding to the
pipeline stages. If there are no stages, the pipeline acts as an
identity transformer.
+
+ .. versionadded:: 1.3.0
"""
@keyword_only
@@ -151,6 +162,7 @@ class Pipeline(Estimator):
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
+ @since("1.3.0")
def setStages(self, value):
"""
Set pipeline stages.
@@ -161,6 +173,7 @@ class Pipeline(Estimator):
self._paramMap[self.stages] = value
return self
+ @since("1.3.0")
def getStages(self):
"""
Get pipeline stages.
@@ -169,6 +182,7 @@ class Pipeline(Estimator):
return self._paramMap[self.stages]
@keyword_only
+ @since("1.3.0")
def setParams(self, stages=None):
"""
setParams(self, stages=None)
@@ -204,7 +218,14 @@ class Pipeline(Estimator):
transformers.append(stage)
return PipelineModel(transformers)
+ @since("1.4.0")
def copy(self, extra=None):
+ """
+ Creates a copy of this instance.
+
+ :param extra: extra parameters
+ :returns: new instance
+ """
if extra is None:
extra = dict()
that = Params.copy(self, extra)
@@ -216,6 +237,8 @@ class Pipeline(Estimator):
class PipelineModel(Model):
"""
Represents a compiled pipeline with transformers and fitted models.
+
+ .. versionadded:: 1.3.0
"""
def __init__(self, stages):
@@ -227,7 +250,14 @@ class PipelineModel(Model):
dataset = t.transform(dataset)
return dataset
+ @since("1.4.0")
def copy(self, extra=None):
+ """
+ Creates a copy of this instance.
+
+ :param extra: extra parameters
+ :returns: new instance
+ """
if extra is None:
extra = dict()
stages = [stage.copy(extra) for stage in self.stages]