diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2015-06-29 18:50:23 -0700 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2015-06-29 18:50:23 -0700 |
commit | f9b6bf2f83d9dad273aa36d65d0560d35b941cc2 (patch) | |
tree | 01521b0e8f02a2ab59f3f428825de1a05e4fec27 /python/pyspark/mllib | |
parent | 4915e9e3bffb57eac319ef2173b4a6ae4073d25e (diff) | |
download | spark-f9b6bf2f83d9dad273aa36d65d0560d35b941cc2.tar.gz spark-f9b6bf2f83d9dad273aa36d65d0560d35b941cc2.tar.bz2 spark-f9b6bf2f83d9dad273aa36d65d0560d35b941cc2.zip |
[SPARK-7667] [MLLIB] MLlib Python API consistency check
MLlib Python API consistency check
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #6856 from yanboliang/spark-7667 and squashes the following commits:
21bae35 [Yanbo Liang] remove duplicate code
eb12f95 [Yanbo Liang] fix doc inherit problem
9e7ec3c [Yanbo Liang] address comments
e763d32 [Yanbo Liang] MLlib Python API consistency check
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r-- | python/pyspark/mllib/feature.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index f00bb93b7b..b5138773fd 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -111,6 +111,15 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer): """ def transform(self, vector): + """ + Applies transformation on a vector or an RDD[Vector]. + + Note: In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. + + :param vector: Vector or RDD of Vector to be transformed. + """ if isinstance(vector, RDD): vector = vector.map(_convert_to_vector) else: @@ -191,7 +200,7 @@ class StandardScaler(object): Computes the mean and variance and stores as a model to be used for later scaling. - :param data: The data used to compute the mean and variance + :param dataset: The data used to compute the mean and variance to build the transformation model. :return: a StandardScalarModel """ @@ -346,10 +355,6 @@ class IDFModel(JavaVectorTransformer): vector :return: an RDD of TF-IDF vectors or a TF-IDF vector """ - if isinstance(x, RDD): - return JavaVectorTransformer.transform(self, x) - - x = _convert_to_vector(x) return JavaVectorTransformer.transform(self, x) def idf(self): |