aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/feature.py
diff options
context:
space:
mode:
authorBryan Cutler <cutlerb@gmail.com>2017-03-03 16:43:45 -0800
committerJoseph K. Bradley <joseph@databricks.com>2017-03-03 16:43:45 -0800
commit44281ca81d4eda02b627ba21841108438b7d1c27 (patch)
tree4125cfa2e8dd98e247ae7240d88f3845ce871734 /python/pyspark/ml/feature.py
parent2a7921a813ecd847fd933ffef10edc64684e9df7 (diff)
downloadspark-44281ca81d4eda02b627ba21841108438b7d1c27.tar.gz
spark-44281ca81d4eda02b627ba21841108438b7d1c27.tar.bz2
spark-44281ca81d4eda02b627ba21841108438b7d1c27.zip
[SPARK-19348][PYTHON] PySpark keyword_only decorator is not thread-safe
## What changes were proposed in this pull request? The `keyword_only` decorator in PySpark is not thread-safe. It writes kwargs to a static class variable in the decorator, which is then retrieved later in the class method as `_input_kwargs`. If multiple threads are constructing the same class with different kwargs, it becomes a race condition to read from the static class variable before it's overwritten. See [SPARK-19348](https://issues.apache.org/jira/browse/SPARK-19348) for reproduction code. This change will write the kwargs to a member variable so that multiple threads can operate on separate instances without the race condition. It does not protect against multiple threads operating on a single instance, but that is better left to the user to synchronize. ## How was this patch tested? Added new unit tests for using the keyword_only decorator and a regression test that verifies `_input_kwargs` can be overwritten from different class instances. Author: Bryan Cutler <cutlerb@gmail.com> Closes #16782 from BryanCutler/pyspark-keyword_only-threadsafe-SPARK-19348.
Diffstat (limited to 'python/pyspark/ml/feature.py')
-rwxr-xr-xpython/pyspark/ml/feature.py120
1 files changed, 60 insertions, 60 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 83cf763c2c..92f8549e9c 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -94,7 +94,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
super(Binarizer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid)
self._setDefault(threshold=0.0)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -104,7 +104,7 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
setParams(self, threshold=0.0, inputCol=None, outputCol=None)
Sets params for this Binarizer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -265,7 +265,7 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp
self._java_obj = \
self._new_java_obj("org.apache.spark.ml.feature.BucketedRandomProjectionLSH", self.uid)
self._setDefault(numHashTables=1)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -277,7 +277,7 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp
bucketLength=None)
Sets params for this BucketedRandomProjectionLSH.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("2.2.0")
@@ -370,7 +370,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
super(Bucketizer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
self._setDefault(handleInvalid="error")
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -380,7 +380,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
Sets params for this Bucketizer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -484,7 +484,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable,
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.CountVectorizer",
self.uid)
self._setDefault(minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -496,7 +496,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable,
outputCol=None)
Set the params for the CountVectorizer
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -616,7 +616,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
super(DCT, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid)
self._setDefault(inverse=False)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -626,7 +626,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
setParams(self, inverse=False, inputCol=None, outputCol=None)
Sets params for this DCT.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -680,7 +680,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
super(ElementwiseProduct, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ElementwiseProduct",
self.uid)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -690,7 +690,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
setParams(self, scalingVec=None, inputCol=None, outputCol=None)
Sets params for this ElementwiseProduct.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("2.0.0")
@@ -750,7 +750,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
super(HashingTF, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid)
self._setDefault(numFeatures=1 << 18, binary=False)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -760,7 +760,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
Sets params for this HashingTF.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("2.0.0")
@@ -823,7 +823,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
super(IDF, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid)
self._setDefault(minDocFreq=0)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -833,7 +833,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
Sets params for this IDF.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -913,7 +913,7 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
super(MaxAbsScaler, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid)
self._setDefault()
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -923,7 +923,7 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
setParams(self, inputCol=None, outputCol=None)
Sets params for this MaxAbsScaler.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
def _create_model(self, java_model):
@@ -1011,7 +1011,7 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
super(MinHashLSH, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinHashLSH", self.uid)
self._setDefault(numHashTables=1)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1021,7 +1021,7 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1)
Sets params for this MinHashLSH.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
def _create_model(self, java_model):
@@ -1106,7 +1106,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
super(MinMaxScaler, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinMaxScaler", self.uid)
self._setDefault(min=0.0, max=1.0)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1116,7 +1116,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
Sets params for this MinMaxScaler.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -1224,7 +1224,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
super(NGram, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.NGram", self.uid)
self._setDefault(n=2)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1234,7 +1234,7 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
setParams(self, n=2, inputCol=None, outputCol=None)
Sets params for this NGram.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.5.0")
@@ -1288,7 +1288,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
super(Normalizer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid)
self._setDefault(p=2.0)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1298,7 +1298,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
setParams(self, p=2.0, inputCol=None, outputCol=None)
Sets params for this Normalizer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -1368,7 +1368,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
super(OneHotEncoder, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid)
self._setDefault(dropLast=True)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1378,7 +1378,7 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
setParams(self, dropLast=True, inputCol=None, outputCol=None)
Sets params for this OneHotEncoder.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -1434,7 +1434,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.feature.PolynomialExpansion", self.uid)
self._setDefault(degree=2)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1444,7 +1444,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
setParams(self, degree=2, inputCol=None, outputCol=None)
Sets params for this PolynomialExpansion.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -1540,7 +1540,7 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
self.uid)
self._setDefault(numBuckets=2, relativeError=0.001, handleInvalid="error")
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1552,7 +1552,7 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab
handleInvalid="error")
Set the params for the QuantileDiscretizer
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("2.0.0")
@@ -1665,7 +1665,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
super(RegexTokenizer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid)
self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+", toLowercase=True)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1677,7 +1677,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
outputCol=None, toLowercase=True)
Sets params for this RegexTokenizer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -1768,7 +1768,7 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
"""
super(SQLTransformer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1778,7 +1778,7 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
setParams(self, statement=None)
Sets params for this SQLTransformer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -1847,7 +1847,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
super(StandardScaler, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid)
self._setDefault(withMean=False, withStd=True)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1857,7 +1857,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
Sets params for this StandardScaler.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -1963,7 +1963,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
super(StringIndexer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid)
self._setDefault(handleInvalid="error")
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -1973,7 +1973,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
setParams(self, inputCol=None, outputCol=None, handleInvalid="error")
Sets params for this StringIndexer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
def _create_model(self, java_model):
@@ -2021,7 +2021,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
super(IndexToString, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
self.uid)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2031,7 +2031,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
setParams(self, inputCol=None, outputCol=None, labels=None)
Sets params for this IndexToString.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -2085,7 +2085,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl
self.uid)
self._setDefault(stopWords=StopWordsRemover.loadDefaultStopWords("english"),
caseSensitive=False)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2095,7 +2095,7 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl
setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false)
Sets params for this StopWordRemover.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -2178,7 +2178,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
"""
super(Tokenizer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2188,7 +2188,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
setParams(self, inputCol=None, outputCol=None)
Sets params for this Tokenizer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@@ -2222,7 +2222,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl
"""
super(VectorAssembler, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2232,7 +2232,7 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadabl
setParams(self, inputCols=None, outputCol=None)
Sets params for this VectorAssembler.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@@ -2320,7 +2320,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
super(VectorIndexer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid)
self._setDefault(maxCategories=20)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2330,7 +2330,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
setParams(self, maxCategories=20, inputCol=None, outputCol=None)
Sets params for this VectorIndexer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -2435,7 +2435,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
super(VectorSlicer, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSlicer", self.uid)
self._setDefault(indices=[], names=[])
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2445,7 +2445,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
Sets params for this VectorSlicer.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.6.0")
@@ -2558,7 +2558,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
windowSize=5, maxSentenceLength=1000)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2570,7 +2570,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
Sets params for this Word2Vec.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.4.0")
@@ -2718,7 +2718,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
"""
super(PCA, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.PCA", self.uid)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2728,7 +2728,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
setParams(self, k=None, inputCol=None, outputCol=None)
Set params for this PCA.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.5.0")
@@ -2858,7 +2858,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM
super(RFormula, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid)
self._setDefault(forceIndexLabel=False)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -2870,7 +2870,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM
forceIndexLabel=False)
Sets params for RFormula.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("1.5.0")
@@ -3017,7 +3017,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ChiSqSelector", self.uid)
self._setDefault(numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1,
fpr=0.05, fdr=0.05, fwe=0.05)
- kwargs = self.__init__._input_kwargs
+ kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
@@ -3031,7 +3031,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja
fdr=0.05, fwe=0.05)
Sets params for this ChiSqSelector.
"""
- kwargs = self.setParams._input_kwargs
+ kwargs = self._input_kwargs
return self._set(**kwargs)
@since("2.1.0")