aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/feature.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index f00bb93b7b..b5138773fd 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -111,6 +111,15 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
"""
def transform(self, vector):
+ """
+ Applies transformation on a vector or an RDD[Vector].
+
+ Note: In Python, transform cannot currently be used within
+ an RDD transformation or action.
+ Call transform directly on the RDD instead.
+
+ :param vector: Vector or RDD of Vector to be transformed.
+ """
if isinstance(vector, RDD):
vector = vector.map(_convert_to_vector)
else:
@@ -191,7 +200,7 @@ class StandardScaler(object):
Computes the mean and variance and stores as a model to be used
for later scaling.
- :param data: The data used to compute the mean and variance
+ :param dataset: The data used to compute the mean and variance
to build the transformation model.
:return: a StandardScalarModel
"""
@@ -346,10 +355,6 @@ class IDFModel(JavaVectorTransformer):
vector
:return: an RDD of TF-IDF vectors or a TF-IDF vector
"""
- if isinstance(x, RDD):
- return JavaVectorTransformer.transform(self, x)
-
- x = _convert_to_vector(x)
return JavaVectorTransformer.transform(self, x)
def idf(self):