From 5b3245d6dff65972fc39c73f90d5cbdf84d19129 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Mon, 31 Aug 2015 15:50:41 -0700 Subject: [SPARK-8472] [ML] [PySpark] Python API for DCT Add Python API for ml.feature.DCT. Author: Yanbo Liang Closes #8485 from yanboliang/spark-8472. --- python/pyspark/ml/feature.py | 65 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'python/pyspark/ml/feature.py') diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 04b2b2ccc9..59300a6078 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -26,7 +26,7 @@ from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaTransformer from pyspark.mllib.common import inherit_doc from pyspark.mllib.linalg import _convert_to_vector -__all__ = ['Binarizer', 'Bucketizer', 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel', +__all__ = ['Binarizer', 'Bucketizer', 'DCT', 'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel', 'NGram', 'Normalizer', 'OneHotEncoder', 'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 'StandardScalerModel', 'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer', 'Word2Vec', 'Word2VecModel', @@ -166,6 +166,69 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): return self.getOrDefault(self.splits) +@inherit_doc +class DCT(JavaTransformer, HasInputCol, HasOutputCol): + """ + A feature transformer that takes the 1D discrete cosine transform + of a real vector. No zero padding is performed on the input vector. + It returns a real vector of the same length representing the DCT. + The return vector is scaled such that the transform matrix is + unitary (aka scaled DCT-II). + + More information on + `https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia`. + + >>> from pyspark.mllib.linalg import Vectors + >>> df1 = sqlContext.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"]) + >>> dct = DCT(inverse=False, inputCol="vec", outputCol="resultVec") + >>> df2 = dct.transform(df1) + >>> df2.head().resultVec + DenseVector([10.969..., -0.707..., -2.041...]) + >>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2) + >>> df3.head().origVec + DenseVector([5.0, 8.0, 6.0]) + """ + + # a placeholder to make it appear in the generated doc + inverse = Param(Params._dummy(), "inverse", "Set transformer to perform inverse DCT, " + + "default False.") + + @keyword_only + def __init__(self, inverse=False, inputCol=None, outputCol=None): + """ + __init__(self, inverse=False, inputCol=None, outputCol=None) + """ + super(DCT, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid) + self.inverse = Param(self, "inverse", "Set transformer to perform inverse DCT, " + + "default False.") + self._setDefault(inverse=False) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, inverse=False, inputCol=None, outputCol=None): + """ + setParams(self, inverse=False, inputCol=None, outputCol=None) + Sets params for this DCT. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setInverse(self, value): + """ + Sets the value of :py:attr:`inverse`. + """ + self._paramMap[self.inverse] = value + return self + + def getInverse(self): + """ + Gets the value of inverse or its default value. + """ + return self.getOrDefault(self.inverse) + + @inherit_doc class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): """ -- cgit v1.2.3