From 44281ca81d4eda02b627ba21841108438b7d1c27 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 3 Mar 2017 16:43:45 -0800 Subject: [SPARK-19348][PYTHON] PySpark keyword_only decorator is not thread-safe ## What changes were proposed in this pull request? The `keyword_only` decorator in PySpark is not thread-safe. It writes kwargs to a static class variable in the decorator, which is then retrieved later in the class method as `_input_kwargs`. If multiple threads are constructing the same class with different kwargs, it becomes a race condition to read from the static class variable before it's overwritten. See [SPARK-19348](https://issues.apache.org/jira/browse/SPARK-19348) for reproduction code. This change will write the kwargs to a member variable so that multiple threads can operate on separate instances without the race condition. It does not protect against multiple threads operating on a single instance, but that is better left to the user to synchronize. ## How was this patch tested? Added new unit tests for using the keyword_only decorator and a regression test that verifies `_input_kwargs` can be overwritten from different class instances. Author: Bryan Cutler Closes #16782 from BryanCutler/pyspark-keyword_only-threadsafe-SPARK-19348. --- python/pyspark/ml/feature.py | 120 +++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 60 deletions(-) (limited to 'python/pyspark/ml/feature.py') diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 83cf763c2c..92f8549e9c 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -94,7 +94,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java super(Binarizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid) self._setDefault(threshold=0.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -104,7 +104,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java setParams(self, threshold=0.0, inputCol=None, outputCol=None) Sets params for this Binarizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -265,7 +265,7 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp self._java_obj = \ self._new_java_obj("org.apache.spark.ml.feature.BucketedRandomProjectionLSH", self.uid) self._setDefault(numHashTables=1) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -277,7 +277,7 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp bucketLength=None) Sets params for this BucketedRandomProjectionLSH. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.2.0") @@ -370,7 +370,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav super(Bucketizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid) self._setDefault(handleInvalid="error") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -380,7 +380,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error") Sets params for this Bucketizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -484,7 +484,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.CountVectorizer", self.uid) self._setDefault(minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -496,7 +496,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, outputCol=None) Set the params for the CountVectorizer """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -616,7 +616,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit super(DCT, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid) self._setDefault(inverse=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -626,7 +626,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit setParams(self, inverse=False, inputCol=None, outputCol=None) Sets params for this DCT. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -680,7 +680,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada super(ElementwiseProduct, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ElementwiseProduct", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -690,7 +690,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada setParams(self, scalingVec=None, inputCol=None, outputCol=None) Sets params for this ElementwiseProduct. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -750,7 +750,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java super(HashingTF, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid) self._setDefault(numFeatures=1 << 18, binary=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -760,7 +760,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None) Sets params for this HashingTF. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -823,7 +823,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab super(IDF, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid) self._setDefault(minDocFreq=0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -833,7 +833,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab setParams(self, minDocFreq=0, inputCol=None, outputCol=None) Sets params for this IDF. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -913,7 +913,7 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav super(MaxAbsScaler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid) self._setDefault() - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -923,7 +923,7 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav setParams(self, inputCol=None, outputCol=None) Sets params for this MaxAbsScaler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1011,7 +1011,7 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed, super(MinHashLSH, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinHashLSH", self.uid) self._setDefault(numHashTables=1) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1021,7 +1021,7 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed, setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1) Sets params for this MinHashLSH. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1106,7 +1106,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav super(MinMaxScaler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinMaxScaler", self.uid) self._setDefault(min=0.0, max=1.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1116,7 +1116,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None) Sets params for this MinMaxScaler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -1224,7 +1224,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr super(NGram, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.NGram", self.uid) self._setDefault(n=2) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1234,7 +1234,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr setParams(self, n=2, inputCol=None, outputCol=None) Sets params for this NGram. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -1288,7 +1288,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav super(Normalizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid) self._setDefault(p=2.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1298,7 +1298,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav setParams(self, p=2.0, inputCol=None, outputCol=None) Sets params for this Normalizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1368,7 +1368,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, super(OneHotEncoder, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid) self._setDefault(dropLast=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1378,7 +1378,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, setParams(self, dropLast=True, inputCol=None, outputCol=None) Sets params for this OneHotEncoder. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1434,7 +1434,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead self._java_obj = self._new_java_obj( "org.apache.spark.ml.feature.PolynomialExpansion", self.uid) self._setDefault(degree=2) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1444,7 +1444,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead setParams(self, degree=2, inputCol=None, outputCol=None) Sets params for this PolynomialExpansion. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1540,7 +1540,7 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer", self.uid) self._setDefault(numBuckets=2, relativeError=0.001, handleInvalid="error") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1552,7 +1552,7 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab handleInvalid="error") Set the params for the QuantileDiscretizer """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -1665,7 +1665,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, super(RegexTokenizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid) self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+", toLowercase=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1677,7 +1677,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, outputCol=None, toLowercase=True) Sets params for this RegexTokenizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1768,7 +1768,7 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable): """ super(SQLTransformer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1778,7 +1778,7 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable): setParams(self, statement=None) Sets params for this SQLTransformer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -1847,7 +1847,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J super(StandardScaler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid) self._setDefault(withMean=False, withStd=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1857,7 +1857,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None) Sets params for this StandardScaler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1963,7 +1963,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, super(StringIndexer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid) self._setDefault(handleInvalid="error") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1973,7 +1973,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, setParams(self, inputCol=None, outputCol=None, handleInvalid="error") Sets params for this StringIndexer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -2021,7 +2021,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, super(IndexToString, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2031,7 +2031,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, setParams(self, inputCol=None, outputCol=None, labels=None) Sets params for this IndexToString. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -2085,7 +2085,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl self.uid) self._setDefault(stopWords=StopWordsRemover.loadDefaultStopWords("english"), caseSensitive=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2095,7 +2095,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false) Sets params for this StopWordRemover. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -2178,7 +2178,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java """ super(Tokenizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2188,7 +2188,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java setParams(self, inputCol=None, outputCol=None) Sets params for this Tokenizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -2222,7 +2222,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl """ super(VectorAssembler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2232,7 +2232,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl setParams(self, inputCols=None, outputCol=None) Sets params for this VectorAssembler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -2320,7 +2320,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja super(VectorIndexer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid) self._setDefault(maxCategories=20) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2330,7 +2330,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja setParams(self, maxCategories=20, inputCol=None, outputCol=None) Sets params for this VectorIndexer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -2435,7 +2435,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J super(VectorSlicer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSlicer", self.uid) self._setDefault(indices=[], names=[]) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2445,7 +2445,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J setParams(self, inputCol=None, outputCol=None, indices=None, names=None): Sets params for this VectorSlicer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -2558,7 +2558,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid) self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, windowSize=5, maxSentenceLength=1000) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2570,7 +2570,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000) Sets params for this Word2Vec. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -2718,7 +2718,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab """ super(PCA, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.PCA", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2728,7 +2728,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab setParams(self, k=None, inputCol=None, outputCol=None) Set params for this PCA. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -2858,7 +2858,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM super(RFormula, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid) self._setDefault(forceIndexLabel=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2870,7 +2870,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM forceIndexLabel=False) Sets params for RFormula. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -3017,7 +3017,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ChiSqSelector", self.uid) self._setDefault(numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1, fpr=0.05, fdr=0.05, fwe=0.05) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -3031,7 +3031,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja fdr=0.05, fwe=0.05) Sets params for this ChiSqSelector. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.1.0") -- cgit v1.2.3