aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-06-29 18:50:23 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-06-29 18:50:23 -0700
commitf9b6bf2f83d9dad273aa36d65d0560d35b941cc2 (patch)
tree01521b0e8f02a2ab59f3f428825de1a05e4fec27 /python
parent4915e9e3bffb57eac319ef2173b4a6ae4073d25e (diff)
downloadspark-f9b6bf2f83d9dad273aa36d65d0560d35b941cc2.tar.gz
spark-f9b6bf2f83d9dad273aa36d65d0560d35b941cc2.tar.bz2
spark-f9b6bf2f83d9dad273aa36d65d0560d35b941cc2.zip
[SPARK-7667] [MLLIB] MLlib Python API consistency check
MLlib Python API consistency check Author: Yanbo Liang <ybliang8@gmail.com> Closes #6856 from yanboliang/spark-7667 and squashes the following commits: 21bae35 [Yanbo Liang] remove duplicate code eb12f95 [Yanbo Liang] fix doc inherit problem 9e7ec3c [Yanbo Liang] address comments e763d32 [Yanbo Liang] MLlib Python API consistency check
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/feature.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index f00bb93b7b..b5138773fd 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -111,6 +111,15 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
"""
def transform(self, vector):
+ """
+ Applies transformation on a vector or an RDD[Vector].
+
+ Note: In Python, transform cannot currently be used within
+ an RDD transformation or action.
+ Call transform directly on the RDD instead.
+
+ :param vector: Vector or RDD of Vector to be transformed.
+ """
if isinstance(vector, RDD):
vector = vector.map(_convert_to_vector)
else:
@@ -191,7 +200,7 @@ class StandardScaler(object):
Computes the mean and variance and stores as a model to be used
for later scaling.
- :param data: The data used to compute the mean and variance
+ :param dataset: The data used to compute the mean and variance
to build the transformation model.
:return: a StandardScalarModel
"""
@@ -346,10 +355,6 @@ class IDFModel(JavaVectorTransformer):
vector
:return: an RDD of TF-IDF vectors or a TF-IDF vector
"""
- if isinstance(x, RDD):
- return JavaVectorTransformer.transform(self, x)
-
- x = _convert_to_vector(x)
return JavaVectorTransformer.transform(self, x)
def idf(self):