diff options
Diffstat (limited to 'python/pyspark/ml')
-rw-r--r-- | python/pyspark/ml/evaluation.py | 20 | ||||
-rw-r--r-- | python/pyspark/ml/feature.py | 164 | ||||
-rw-r--r-- | python/pyspark/ml/param/__init__.py | 16 | ||||
-rw-r--r-- | python/pyspark/ml/pipeline.py | 30 |
4 files changed, 230 insertions, 0 deletions
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index cb3b07947e..dcc1738ec5 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -17,6 +17,7 @@ from abc import abstractmethod, ABCMeta +from pyspark import since from pyspark.ml.wrapper import JavaWrapper from pyspark.ml.param import Param, Params from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol @@ -31,6 +32,8 @@ __all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator', class Evaluator(Params): """ Base class for evaluators that compute metrics from predictions. + + .. versionadded:: 1.4.0 """ __metaclass__ = ABCMeta @@ -46,6 +49,7 @@ class Evaluator(Params): """ raise NotImplementedError() + @since("1.4.0") def evaluate(self, dataset, params=None): """ Evaluates the output with optional parameters. @@ -66,6 +70,7 @@ class Evaluator(Params): else: raise ValueError("Params must be a param map but got %s." % type(params)) + @since("1.5.0") def isLargerBetter(self): """ Indicates whether the metric returned by :py:meth:`evaluate` should be maximized @@ -114,6 +119,8 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction 0.70... >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"}) 0.83... + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -138,6 +145,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction kwargs = self.__init__._input_kwargs self._set(**kwargs) + @since("1.4.0") def setMetricName(self, value): """ Sets the value of :py:attr:`metricName`. @@ -145,6 +153,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction self._paramMap[self.metricName] = value return self + @since("1.4.0") def getMetricName(self): """ Gets the value of metricName or its default value. @@ -152,6 +161,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction return self.getOrDefault(self.metricName) @keyword_only + @since("1.4.0") def setParams(self, rawPredictionCol="rawPrediction", labelCol="label", metricName="areaUnderROC"): """ @@ -180,6 +190,8 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): 0.993... >>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"}) 2.649... + + .. versionadded:: 1.4.0 """ # Because we will maximize evaluation value (ref: `CrossValidator`), # when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`), @@ -205,6 +217,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): kwargs = self.__init__._input_kwargs self._set(**kwargs) + @since("1.4.0") def setMetricName(self, value): """ Sets the value of :py:attr:`metricName`. @@ -212,6 +225,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): self._paramMap[self.metricName] = value return self + @since("1.4.0") def getMetricName(self): """ Gets the value of metricName or its default value. @@ -219,6 +233,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): return self.getOrDefault(self.metricName) @keyword_only + @since("1.4.0") def setParams(self, predictionCol="prediction", labelCol="label", metricName="rmse"): """ @@ -246,6 +261,8 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio 0.66... >>> evaluator.evaluate(dataset, {evaluator.metricName: "recall"}) 0.66... + + .. versionadded:: 1.5.0 """ # a placeholder to make it appear in the generated doc metricName = Param(Params._dummy(), "metricName", @@ -271,6 +288,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio kwargs = self.__init__._input_kwargs self._set(**kwargs) + @since("1.5.0") def setMetricName(self, value): """ Sets the value of :py:attr:`metricName`. @@ -278,6 +296,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio self._paramMap[self.metricName] = value return self + @since("1.5.0") def getMetricName(self): """ Gets the value of metricName or its default value. @@ -285,6 +304,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio return self.getOrDefault(self.metricName) @keyword_only + @since("1.5.0") def setParams(self, predictionCol="prediction", labelCol="label", metricName="f1"): """ diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 55bde6d0ea..c7b6dd926c 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -19,6 +19,7 @@ import sys if sys.version > '3': basestring = str +from pyspark import since from pyspark.rdd import ignore_unicode_prefix from pyspark.ml.param.shared import * from pyspark.ml.util import keyword_only @@ -51,6 +52,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"} >>> binarizer.transform(df, params).head().vector 1.0 + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -71,6 +74,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, threshold=0.0, inputCol=None, outputCol=None): """ setParams(self, threshold=0.0, inputCol=None, outputCol=None) @@ -79,6 +83,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setThreshold(self, value): """ Sets the value of :py:attr:`threshold`. @@ -86,6 +91,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.threshold] = value return self + @since("1.4.0") def getThreshold(self): """ Gets the value of threshold or its default value. @@ -114,6 +120,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): 2.0 >>> bucketizer.setParams(outputCol="b").transform(df).head().b 0.0 + + .. versionadded:: 1.3.0 """ # a placeholder to make it appear in the generated doc @@ -150,6 +158,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, splits=None, inputCol=None, outputCol=None): """ setParams(self, splits=None, inputCol=None, outputCol=None) @@ -158,6 +167,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setSplits(self, value): """ Sets the value of :py:attr:`splits`. @@ -165,6 +175,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.splits] = value return self + @since("1.4.0") def getSplits(self): """ Gets the value of threshold or its default value. @@ -194,6 +205,8 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): ... >>> sorted(map(str, model.vocabulary)) ['a', 'b', 'c'] + + .. versionadded:: 1.6.0 """ # a placeholder to make it appear in the generated doc @@ -242,6 +255,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None): """ setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, inputCol=None, outputCol=None) @@ -250,6 +264,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setMinTF(self, value): """ Sets the value of :py:attr:`minTF`. @@ -257,12 +272,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.minTF] = value return self + @since("1.6.0") def getMinTF(self): """ Gets the value of minTF or its default value. """ return self.getOrDefault(self.minTF) + @since("1.6.0") def setMinDF(self, value): """ Sets the value of :py:attr:`minDF`. @@ -270,12 +287,14 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.minDF] = value return self + @since("1.6.0") def getMinDF(self): """ Gets the value of minDF or its default value. """ return self.getOrDefault(self.minDF) + @since("1.6.0") def setVocabSize(self, value): """ Sets the value of :py:attr:`vocabSize`. @@ -283,6 +302,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.vocabSize] = value return self + @since("1.6.0") def getVocabSize(self): """ Gets the value of vocabSize or its default value. @@ -298,9 +318,12 @@ class CountVectorizerModel(JavaModel): .. note:: Experimental Model fitted by CountVectorizer. + + .. versionadded:: 1.6.0 """ @property + @since("1.6.0") def vocabulary(self): """ An array of terms in the vocabulary. @@ -331,6 +354,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol): >>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2) >>> df3.head().origVec DenseVector([5.0, 8.0, 6.0]) + + .. versionadded:: 1.6.0 """ # a placeholder to make it appear in the generated doc @@ -351,6 +376,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, inverse=False, inputCol=None, outputCol=None): """ setParams(self, inverse=False, inputCol=None, outputCol=None) @@ -359,6 +385,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setInverse(self, value): """ Sets the value of :py:attr:`inverse`. @@ -366,6 +393,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.inverse] = value return self + @since("1.6.0") def getInverse(self): """ Gets the value of inverse or its default value. @@ -390,6 +418,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): DenseVector([2.0, 2.0, 9.0]) >>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod DenseVector([4.0, 3.0, 15.0]) + + .. versionadded:: 1.5.0 """ # a placeholder to make it appear in the generated doc @@ -410,6 +440,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.5.0") def setParams(self, scalingVec=None, inputCol=None, outputCol=None): """ setParams(self, scalingVec=None, inputCol=None, outputCol=None) @@ -418,6 +449,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.5.0") def setScalingVec(self, value): """ Sets the value of :py:attr:`scalingVec`. @@ -425,6 +457,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.scalingVec] = value return self + @since("1.5.0") def getScalingVec(self): """ Gets the value of scalingVec or its default value. @@ -449,6 +482,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures): >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"} >>> hashingTF.transform(df, params).head().vector SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0}) + + .. versionadded:: 1.3.0 """ @keyword_only @@ -463,6 +498,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures): self.setParams(**kwargs) @keyword_only + @since("1.3.0") def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None): """ setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None) @@ -490,6 +526,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol): >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"} >>> idf.fit(df, params).transform(df).head().vector DenseVector([0.2877, 0.0]) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -510,6 +548,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, minDocFreq=0, inputCol=None, outputCol=None): """ setParams(self, minDocFreq=0, inputCol=None, outputCol=None) @@ -518,6 +557,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setMinDocFreq(self, value): """ Sets the value of :py:attr:`minDocFreq`. @@ -525,6 +565,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.minDocFreq] = value return self + @since("1.4.0") def getMinDocFreq(self): """ Gets the value of minDocFreq or its default value. @@ -540,6 +581,8 @@ class IDFModel(JavaModel): .. note:: Experimental Model fitted by IDF. + + .. versionadded:: 1.4.0 """ @@ -571,6 +614,8 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol): |[2.0]| [1.0]| +-----+------+ ... + + .. versionadded:: 1.6.0 """ # a placeholder to make it appear in the generated doc @@ -591,6 +636,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None): """ setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None) @@ -599,6 +645,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setMin(self, value): """ Sets the value of :py:attr:`min`. @@ -606,12 +653,14 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.min] = value return self + @since("1.6.0") def getMin(self): """ Gets the value of min or its default value. """ return self.getOrDefault(self.min) + @since("1.6.0") def setMax(self, value): """ Sets the value of :py:attr:`max`. @@ -619,6 +668,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.max] = value return self + @since("1.6.0") def getMax(self): """ Gets the value of max or its default value. @@ -634,6 +684,8 @@ class MinMaxScalerModel(JavaModel): .. note:: Experimental Model fitted by :py:class:`MinMaxScaler`. + + .. versionadded:: 1.6.0 """ @@ -668,6 +720,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol): Traceback (most recent call last): ... TypeError: Method setParams forces keyword arguments. + + .. versionadded:: 1.5.0 """ # a placeholder to make it appear in the generated doc @@ -686,6 +740,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.5.0") def setParams(self, n=2, inputCol=None, outputCol=None): """ setParams(self, n=2, inputCol=None, outputCol=None) @@ -694,6 +749,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.5.0") def setN(self, value): """ Sets the value of :py:attr:`n`. @@ -701,6 +757,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.n] = value return self + @since("1.5.0") def getN(self): """ Gets the value of n or its default value. @@ -726,6 +783,8 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"} >>> normalizer.transform(df, params).head().vector DenseVector([0.4286, -0.5714]) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -744,6 +803,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, p=2.0, inputCol=None, outputCol=None): """ setParams(self, p=2.0, inputCol=None, outputCol=None) @@ -752,6 +812,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setP(self, value): """ Sets the value of :py:attr:`p`. @@ -759,6 +820,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.p] = value return self + @since("1.4.0") def getP(self): """ Gets the value of p or its default value. @@ -800,6 +862,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): >>> params = {encoder.dropLast: False, encoder.outputCol: "test"} >>> encoder.transform(td, params).head().test SparseVector(3, {0: 1.0}) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -818,6 +882,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, dropLast=True, inputCol=None, outputCol=None): """ setParams(self, dropLast=True, inputCol=None, outputCol=None) @@ -826,6 +891,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setDropLast(self, value): """ Sets the value of :py:attr:`dropLast`. @@ -833,6 +899,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.dropLast] = value return self + @since("1.4.0") def getDropLast(self): """ Gets the value of dropLast or its default value. @@ -858,6 +925,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): DenseVector([0.5, 0.25, 2.0, 1.0, 4.0]) >>> px.setParams(outputCol="test").transform(df).head().test DenseVector([0.5, 0.25, 2.0, 1.0, 4.0]) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -877,6 +946,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, degree=2, inputCol=None, outputCol=None): """ setParams(self, degree=2, inputCol=None, outputCol=None) @@ -885,6 +955,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setDegree(self, value): """ Sets the value of :py:attr:`degree`. @@ -892,6 +963,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.degree] = value return self + @since("1.4.0") def getDegree(self): """ Gets the value of degree or its default value. @@ -929,6 +1001,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): Traceback (most recent call last): ... TypeError: Method setParams forces keyword arguments. + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -951,6 +1025,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None): """ setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None) @@ -959,6 +1034,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setMinTokenLength(self, value): """ Sets the value of :py:attr:`minTokenLength`. @@ -966,12 +1042,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.minTokenLength] = value return self + @since("1.4.0") def getMinTokenLength(self): """ Gets the value of minTokenLength or its default value. """ return self.getOrDefault(self.minTokenLength) + @since("1.4.0") def setGaps(self, value): """ Sets the value of :py:attr:`gaps`. @@ -979,12 +1057,14 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.gaps] = value return self + @since("1.4.0") def getGaps(self): """ Gets the value of gaps or its default value. """ return self.getOrDefault(self.gaps) + @since("1.4.0") def setPattern(self, value): """ Sets the value of :py:attr:`pattern`. @@ -992,6 +1072,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.pattern] = value return self + @since("1.4.0") def getPattern(self): """ Gets the value of pattern or its default value. @@ -1013,6 +1094,8 @@ class SQLTransformer(JavaTransformer): ... statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__") >>> sqlTrans.transform(df).head() Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0) + + .. versionadded:: 1.6.0 """ # a placeholder to make it appear in the generated doc @@ -1030,6 +1113,7 @@ class SQLTransformer(JavaTransformer): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, statement=None): """ setParams(self, statement=None) @@ -1038,6 +1122,7 @@ class SQLTransformer(JavaTransformer): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setStatement(self, value): """ Sets the value of :py:attr:`statement`. @@ -1045,6 +1130,7 @@ class SQLTransformer(JavaTransformer): self._paramMap[self.statement] = value return self + @since("1.6.0") def getStatement(self): """ Gets the value of statement or its default value. @@ -1070,6 +1156,8 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): DenseVector([1.4142]) >>> model.transform(df).collect()[1].scaled DenseVector([1.4142]) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -1090,6 +1178,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None): """ setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None) @@ -1098,6 +1187,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setWithMean(self, value): """ Sets the value of :py:attr:`withMean`. @@ -1105,12 +1195,14 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.withMean] = value return self + @since("1.4.0") def getWithMean(self): """ Gets the value of withMean or its default value. """ return self.getOrDefault(self.withMean) + @since("1.4.0") def setWithStd(self, value): """ Sets the value of :py:attr:`withStd`. @@ -1118,6 +1210,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.withStd] = value return self + @since("1.4.0") def getWithStd(self): """ Gets the value of withStd or its default value. @@ -1133,9 +1226,12 @@ class StandardScalerModel(JavaModel): .. note:: Experimental Model fitted by StandardScaler. + + .. versionadded:: 1.4.0 """ @property + @since("1.5.0") def std(self): """ Standard deviation of the StandardScalerModel. @@ -1143,6 +1239,7 @@ class StandardScalerModel(JavaModel): return self._call_java("std") @property + @since("1.5.0") def mean(self): """ Mean of the StandardScalerModel. @@ -1171,6 +1268,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid): >>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]), ... key=lambda x: x[0]) [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')] + + .. versionadded:: 1.4.0 """ @keyword_only @@ -1185,6 +1284,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"): """ setParams(self, inputCol=None, outputCol=None, handleInvalid="error") @@ -1202,8 +1302,11 @@ class StringIndexerModel(JavaModel): .. note:: Experimental Model fitted by StringIndexer. + + .. versionadded:: 1.4.0 """ @property + @since("1.5.0") def labels(self): """ Ordered list of labels, corresponding to indices to be assigned. @@ -1221,6 +1324,8 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol): The index-string mapping is either from the ML attributes of the input column, or from user-supplied labels (which take precedence over ML attributes). See L{StringIndexer} for converting strings into indices. + + .. versionadded:: 1.6.0 """ # a placeholder to make the labels show up in generated doc @@ -1243,6 +1348,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, inputCol=None, outputCol=None, labels=None): """ setParams(self, inputCol=None, outputCol=None, labels=None) @@ -1251,6 +1357,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setLabels(self, value): """ Sets the value of :py:attr:`labels`. @@ -1258,6 +1365,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.labels] = value return self + @since("1.6.0") def getLabels(self): """ Gets the value of :py:attr:`labels` or its default value. @@ -1271,6 +1379,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol): A feature transformer that filters out stop words from input. Note: null values from input array are preserved unless adding null to stopWords explicitly. + + .. versionadded:: 1.6.0 """ # a placeholder to make the stopwords show up in generated doc stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out") @@ -1297,6 +1407,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False): """ @@ -1307,6 +1418,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setStopWords(self, value): """ Specify the stopwords to be filtered. @@ -1314,12 +1426,14 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.stopWords] = value return self + @since("1.6.0") def getStopWords(self): """ Get the stopwords. """ return self.getOrDefault(self.stopWords) + @since("1.6.0") def setCaseSensitive(self, value): """ Set whether to do a case sensitive comparison over the stop words @@ -1327,6 +1441,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.caseSensitive] = value return self + @since("1.6.0") def getCaseSensitive(self): """ Get whether to do a case sensitive comparison over the stop words. @@ -1360,6 +1475,8 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol): Traceback (most recent call last): ... TypeError: Method setParams forces keyword arguments. + + .. versionadded:: 1.3.0 """ @keyword_only @@ -1373,6 +1490,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.3.0") def setParams(self, inputCol=None, outputCol=None): """ setParams(self, inputCol="input", outputCol="output") @@ -1398,6 +1516,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol): >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"} >>> vecAssembler.transform(df, params).head().vector DenseVector([0.0, 1.0]) + + .. versionadded:: 1.4.0 """ @keyword_only @@ -1411,6 +1531,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, inputCols=None, outputCol=None): """ setParams(self, inputCols=None, outputCol=None) @@ -1477,6 +1598,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): >>> model2 = indexer.fit(df, params) >>> model2.transform(df).head().vector DenseVector([1.0, 0.0]) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -1501,6 +1624,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, maxCategories=20, inputCol=None, outputCol=None): """ setParams(self, maxCategories=20, inputCol=None, outputCol=None) @@ -1509,6 +1633,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setMaxCategories(self, value): """ Sets the value of :py:attr:`maxCategories`. @@ -1516,6 +1641,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.maxCategories] = value return self + @since("1.4.0") def getMaxCategories(self): """ Gets the value of maxCategories or its default value. @@ -1531,9 +1657,12 @@ class VectorIndexerModel(JavaModel): .. note:: Experimental Model fitted by VectorIndexer. + + .. versionadded:: 1.4.0 """ @property + @since("1.4.0") def numFeatures(self): """ Number of features, i.e., length of Vectors which this transforms. @@ -1541,6 +1670,7 @@ class VectorIndexerModel(JavaModel): return self._call_java("numFeatures") @property + @since("1.4.0") def categoryMaps(self): """ Feature value index. Keys are categorical feature indices (column indices). @@ -1573,6 +1703,8 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol): >>> vs = VectorSlicer(inputCol="features", outputCol="sliced", indices=[1, 4]) >>> vs.transform(df).head().sliced DenseVector([2.3, 1.0]) + + .. versionadded:: 1.6.0 """ # a placeholder to make it appear in the generated doc @@ -1600,6 +1732,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.6.0") def setParams(self, inputCol=None, outputCol=None, indices=None, names=None): """ setParams(self, inputCol=None, outputCol=None, indices=None, names=None): @@ -1608,6 +1741,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.6.0") def setIndices(self, value): """ Sets the value of :py:attr:`indices`. @@ -1615,12 +1749,14 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.indices] = value return self + @since("1.6.0") def getIndices(self): """ Gets the value of indices or its default value. """ return self.getOrDefault(self.indices) + @since("1.6.0") def setNames(self, value): """ Sets the value of :py:attr:`names`. @@ -1628,6 +1764,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol): self._paramMap[self.names] = value return self + @since("1.6.0") def getNames(self): """ Gets the value of names or its default value. @@ -1666,6 +1803,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has ... >>> model.transform(doc).head().model DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276]) + + .. versionadded:: 1.4.0 """ # a placeholder to make it appear in the generated doc @@ -1699,6 +1838,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has self.setParams(**kwargs) @keyword_only + @since("1.4.0") def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, inputCol=None, outputCol=None): """ @@ -1709,6 +1849,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.4.0") def setVectorSize(self, value): """ Sets the value of :py:attr:`vectorSize`. @@ -1716,12 +1857,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has self._paramMap[self.vectorSize] = value return self + @since("1.4.0") def getVectorSize(self): """ Gets the value of vectorSize or its default value. """ return self.getOrDefault(self.vectorSize) + @since("1.4.0") def setNumPartitions(self, value): """ Sets the value of :py:attr:`numPartitions`. @@ -1729,12 +1872,14 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has self._paramMap[self.numPartitions] = value return self + @since("1.4.0") def getNumPartitions(self): """ Gets the value of numPartitions or its default value. """ return self.getOrDefault(self.numPartitions) + @since("1.4.0") def setMinCount(self, value): """ Sets the value of :py:attr:`minCount`. @@ -1742,6 +1887,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has self._paramMap[self.minCount] = value return self + @since("1.4.0") def getMinCount(self): """ Gets the value of minCount or its default value. @@ -1757,8 +1903,11 @@ class Word2VecModel(JavaModel): .. note:: Experimental Model fitted by Word2Vec. + + .. versionadded:: 1.4.0 """ + @since("1.5.0") def getVectors(self): """ Returns the vector representation of the words as a dataframe @@ -1766,6 +1915,7 @@ class Word2VecModel(JavaModel): """ return self._call_java("getVectors") + @since("1.5.0") def findSynonyms(self, word, num): """ Find "num" number of words closest in similarity to "word". @@ -1794,6 +1944,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol): >>> model = pca.fit(df) >>> model.transform(df).collect()[0].pca_features DenseVector([1.648..., -4.013...]) + + .. versionadded:: 1.5.0 """ # a placeholder to make it appear in the generated doc @@ -1811,6 +1963,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol): self.setParams(**kwargs) @keyword_only + @since("1.5.0") def setParams(self, k=None, inputCol=None, outputCol=None): """ setParams(self, k=None, inputCol=None, outputCol=None) @@ -1819,6 +1972,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.5.0") def setK(self, value): """ Sets the value of :py:attr:`k`. @@ -1826,6 +1980,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol): self._paramMap[self.k] = value return self + @since("1.5.0") def getK(self): """ Gets the value of k or its default value. @@ -1841,6 +1996,8 @@ class PCAModel(JavaModel): .. note:: Experimental Model fitted by PCA. + + .. versionadded:: 1.5.0 """ @@ -1879,6 +2036,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol): |0.0|0.0| a| [0.0]| 0.0| +---+---+---+--------+-----+ ... + + .. versionadded:: 1.5.0 """ # a placeholder to make it appear in the generated doc @@ -1896,6 +2055,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol): self.setParams(**kwargs) @keyword_only + @since("1.5.0") def setParams(self, formula=None, featuresCol="features", labelCol="label"): """ setParams(self, formula=None, featuresCol="features", labelCol="label") @@ -1904,6 +2064,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol): kwargs = self.setParams._input_kwargs return self._set(**kwargs) + @since("1.5.0") def setFormula(self, value): """ Sets the value of :py:attr:`formula`. @@ -1911,6 +2072,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol): self._paramMap[self.formula] = value return self + @since("1.5.0") def getFormula(self): """ Gets the value of :py:attr:`formula`. @@ -1926,6 +2088,8 @@ class RFormulaModel(JavaModel): .. note:: Experimental Model fitted by :py:class:`RFormula`. + + .. versionadded:: 1.5.0 """ diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 2e0c63cb47..35c9b776a3 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -18,6 +18,7 @@ from abc import ABCMeta import copy +from pyspark import since from pyspark.ml.util import Identifiable @@ -27,6 +28,8 @@ __all__ = ['Param', 'Params'] class Param(object): """ A param with self-contained documentation. + + .. versionadded:: 1.3.0 """ def __init__(self, parent, name, doc): @@ -56,6 +59,8 @@ class Params(Identifiable): """ Components that take parameters. This also provides an internal param map to store parameter values attached to the instance. + + .. versionadded:: 1.3.0 """ __metaclass__ = ABCMeta @@ -72,6 +77,7 @@ class Params(Identifiable): self._params = None @property + @since("1.3.0") def params(self): """ Returns all params ordered by name. The default implementation @@ -83,6 +89,7 @@ class Params(Identifiable): [getattr(self, x) for x in dir(self) if x != "params"])) return self._params + @since("1.4.0") def explainParam(self, param): """ Explains a single param and returns its name, doc, and optional @@ -100,6 +107,7 @@ class Params(Identifiable): valueStr = "(" + ", ".join(values) + ")" return "%s: %s %s" % (param.name, param.doc, valueStr) + @since("1.4.0") def explainParams(self): """ Returns the documentation of all params with their optionally @@ -107,6 +115,7 @@ class Params(Identifiable): """ return "\n".join([self.explainParam(param) for param in self.params]) + @since("1.4.0") def getParam(self, paramName): """ Gets a param by its name. @@ -117,6 +126,7 @@ class Params(Identifiable): else: raise ValueError("Cannot find param with name %s." % paramName) + @since("1.4.0") def isSet(self, param): """ Checks whether a param is explicitly set by user. @@ -124,6 +134,7 @@ class Params(Identifiable): param = self._resolveParam(param) return param in self._paramMap + @since("1.4.0") def hasDefault(self, param): """ Checks whether a param has a default value. @@ -131,6 +142,7 @@ class Params(Identifiable): param = self._resolveParam(param) return param in self._defaultParamMap + @since("1.4.0") def isDefined(self, param): """ Checks whether a param is explicitly set by user or has @@ -138,6 +150,7 @@ class Params(Identifiable): """ return self.isSet(param) or self.hasDefault(param) + @since("1.4.0") def hasParam(self, paramName): """ Tests whether this instance contains a param with a given @@ -146,6 +159,7 @@ class Params(Identifiable): param = self._resolveParam(paramName) return param in self.params + @since("1.4.0") def getOrDefault(self, param): """ Gets the value of a param in the user-supplied param map or its @@ -157,6 +171,7 @@ class Params(Identifiable): else: return self._defaultParamMap[param] + @since("1.4.0") def extractParamMap(self, extra=None): """ Extracts the embedded default param values and user-supplied @@ -175,6 +190,7 @@ class Params(Identifiable): paramMap.update(extra) return paramMap + @since("1.4.0") def copy(self, extra=None): """ Creates a copy of this instance with the same uid and some diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index 312a8502b3..4475451edb 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -17,6 +17,7 @@ from abc import ABCMeta, abstractmethod +from pyspark import since from pyspark.ml.param import Param, Params from pyspark.ml.util import keyword_only from pyspark.mllib.common import inherit_doc @@ -26,6 +27,8 @@ from pyspark.mllib.common import inherit_doc class Estimator(Params): """ Abstract class for estimators that fit models to data. + + .. versionadded:: 1.3.0 """ __metaclass__ = ABCMeta @@ -42,6 +45,7 @@ class Estimator(Params): """ raise NotImplementedError() + @since("1.3.0") def fit(self, dataset, params=None): """ Fits a model to the input dataset with optional parameters. @@ -73,6 +77,8 @@ class Transformer(Params): """ Abstract class for transformers that transform one dataset into another. + + .. versionadded:: 1.3.0 """ __metaclass__ = ABCMeta @@ -88,6 +94,7 @@ class Transformer(Params): """ raise NotImplementedError() + @since("1.3.0") def transform(self, dataset, params=None): """ Transforms the input dataset with optional parameters. @@ -113,6 +120,8 @@ class Transformer(Params): class Model(Transformer): """ Abstract class for models that are fitted by estimators. + + .. versionadded:: 1.4.0 """ __metaclass__ = ABCMeta @@ -136,6 +145,8 @@ class Pipeline(Estimator): consists of fitted models and transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as an identity transformer. + + .. versionadded:: 1.3.0 """ @keyword_only @@ -151,6 +162,7 @@ class Pipeline(Estimator): kwargs = self.__init__._input_kwargs self.setParams(**kwargs) + @since("1.3.0") def setStages(self, value): """ Set pipeline stages. @@ -161,6 +173,7 @@ class Pipeline(Estimator): self._paramMap[self.stages] = value return self + @since("1.3.0") def getStages(self): """ Get pipeline stages. @@ -169,6 +182,7 @@ class Pipeline(Estimator): return self._paramMap[self.stages] @keyword_only + @since("1.3.0") def setParams(self, stages=None): """ setParams(self, stages=None) @@ -204,7 +218,14 @@ class Pipeline(Estimator): transformers.append(stage) return PipelineModel(transformers) + @since("1.4.0") def copy(self, extra=None): + """ + Creates a copy of this instance. + + :param extra: extra parameters + :returns: new instance + """ if extra is None: extra = dict() that = Params.copy(self, extra) @@ -216,6 +237,8 @@ class Pipeline(Estimator): class PipelineModel(Model): """ Represents a compiled pipeline with transformers and fitted models. + + .. versionadded:: 1.3.0 """ def __init__(self, stages): @@ -227,7 +250,14 @@ class PipelineModel(Model): dataset = t.transform(dataset) return dataset + @since("1.4.0") def copy(self, extra=None): + """ + Creates a copy of this instance. + + :param extra: extra parameters + :returns: new instance + """ if extra is None: extra = dict() stages = [stage.copy(extra) for stage in self.stages] |