aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/feature.py
diff options
context:
space:
mode:
authorzlpmichelle <zlpmichelle@gmail.com>2016-02-26 14:37:44 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-26 14:37:44 -0800
commit1e5fcdf96c0176a11e5f425ba539b6ed629281db (patch)
treed0c6a8511c4640753dc9f93fa3ce1cb0226c4418 /python/pyspark/ml/feature.py
parent391755dc6ed2e156b8df8a530ac8df6ed7ba7f8a (diff)
downloadspark-1e5fcdf96c0176a11e5f425ba539b6ed629281db.tar.gz
spark-1e5fcdf96c0176a11e5f425ba539b6ed629281db.tar.bz2
spark-1e5fcdf96c0176a11e5f425ba539b6ed629281db.zip
[SPARK-13505][ML] add python api for MaxAbsScaler
## What changes were proposed in this pull request? After SPARK-13028, we should add Python API for MaxAbsScaler. ## How was this patch tested? unit test Author: zlpmichelle <zlpmichelle@gmail.com> Closes #11393 from zlpmichelle/master.
Diffstat (limited to 'python/pyspark/ml/feature.py')
-rw-r--r--python/pyspark/ml/feature.py75
1 files changed, 68 insertions, 7 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 67bccfae7a..369f3508fd 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -28,13 +28,14 @@ from pyspark.mllib.common import inherit_doc
from pyspark.mllib.linalg import _convert_to_vector
__all__ = ['Binarizer', 'Bucketizer', 'CountVectorizer', 'CountVectorizerModel', 'DCT',
- 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel', 'IndexToString', 'MinMaxScaler',
- 'MinMaxScalerModel', 'NGram', 'Normalizer', 'OneHotEncoder', 'PCA', 'PCAModel',
- 'PolynomialExpansion', 'QuantileDiscretizer', 'RegexTokenizer', 'RFormula',
- 'RFormulaModel', 'SQLTransformer', 'StandardScaler', 'StandardScalerModel',
- 'StopWordsRemover', 'StringIndexer', 'StringIndexerModel', 'Tokenizer',
- 'VectorAssembler', 'VectorIndexer', 'VectorSlicer', 'Word2Vec', 'Word2VecModel',
- 'ChiSqSelector', 'ChiSqSelectorModel']
+ 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel', 'IndexToString',
+ 'MaxAbsScaler', 'MaxAbsScalerModel', 'MinMaxScaler', 'MinMaxScalerModel',
+ 'NGram', 'Normalizer', 'OneHotEncoder', 'PCA', 'PCAModel', 'PolynomialExpansion',
+ 'QuantileDiscretizer', 'RegexTokenizer', 'RFormula', 'RFormulaModel',
+ 'SQLTransformer', 'StandardScaler', 'StandardScalerModel', 'StopWordsRemover',
+ 'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler',
+ 'VectorIndexer', 'VectorSlicer', 'Word2Vec', 'Word2VecModel', 'ChiSqSelector',
+ 'ChiSqSelectorModel']
@inherit_doc
@@ -545,6 +546,66 @@ class IDFModel(JavaModel):
@inherit_doc
+class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol):
+ """
+ .. note:: Experimental
+
+ Rescale each feature individually to range [-1, 1] by dividing through the largest maximum
+ absolute value in each feature. It does not shift/center the data, and thus does not destroy
+ any sparsity.
+
+ >>> from pyspark.mllib.linalg import Vectors
+ >>> df = sqlContext.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])
+ >>> maScaler = MaxAbsScaler(inputCol="a", outputCol="scaled")
+ >>> model = maScaler.fit(df)
+ >>> model.transform(df).show()
+ +-----+------+
+ | a|scaled|
+ +-----+------+
+ |[1.0]| [0.5]|
+ |[2.0]| [1.0]|
+ +-----+------+
+ ...
+
+ .. versionadded:: 2.0.0
+ """
+
+ @keyword_only
+ def __init__(self, inputCol=None, outputCol=None):
+ """
+ __init__(self, inputCol=None, outputCol=None)
+ """
+ super(MaxAbsScaler, self).__init__()
+ self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid)
+ self._setDefault()
+ kwargs = self.__init__._input_kwargs
+ self.setParams(**kwargs)
+
+ @keyword_only
+ @since("2.0.0")
+ def setParams(self, inputCol=None, outputCol=None):
+ """
+ setParams(self, inputCol=None, outputCol=None)
+ Sets params for this MaxAbsScaler.
+ """
+ kwargs = self.setParams._input_kwargs
+ return self._set(**kwargs)
+
+ def _create_model(self, java_model):
+ return MaxAbsScalerModel(java_model)
+
+
+class MaxAbsScalerModel(JavaModel):
+ """
+ .. note:: Experimental
+
+ Model fitted by :py:class:`MaxAbsScaler`.
+
+ .. versionadded:: 2.0.0
+ """
+
+
+@inherit_doc
class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol):
"""
.. note:: Experimental