aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2015-09-16 04:34:14 -0700
committerXiangrui Meng <meng@databricks.com>2015-09-16 04:34:14 -0700
commitd9b7f3e4dbceb91ea4d1a1fed3ab847335f8588b (patch)
treeb66b1e7eabf64c404dbcbc29e4b134596bd73353 /python
parent1894653edce718e874d1ddc9ba442bce43cbc082 (diff)
downloadspark-d9b7f3e4dbceb91ea4d1a1fed3ab847335f8588b.tar.gz
spark-d9b7f3e4dbceb91ea4d1a1fed3ab847335f8588b.tar.bz2
spark-d9b7f3e4dbceb91ea4d1a1fed3ab847335f8588b.zip
[SPARK-10276] [MLLIB] [PYSPARK] Add @since annotation to pyspark.mllib.recommendation
Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #8677 from yu-iskw/SPARK-10276.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/recommendation.py36
1 files changed, 35 insertions, 1 deletions
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 506ca2151c..95047b5b7b 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -18,7 +18,7 @@
import array
from collections import namedtuple
-from pyspark import SparkContext
+from pyspark import SparkContext, since
from pyspark.rdd import RDD
from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, inherit_doc
from pyspark.mllib.util import JavaLoader, JavaSaveable
@@ -36,6 +36,8 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])):
(1, 2, 5.0)
>>> (r[0], r[1], r[2])
(1, 2, 5.0)
+
+ .. versionadded:: 1.2.0
"""
def __reduce__(self):
@@ -111,13 +113,17 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
... rmtree(path)
... except OSError:
... pass
+
+ .. versionadded:: 0.9.0
"""
+ @since("0.9.0")
def predict(self, user, product):
"""
Predicts rating for the given user and product.
"""
return self._java_model.predict(int(user), int(product))
+ @since("0.9.0")
def predictAll(self, user_product):
"""
Returns a list of predicted ratings for input user and product pairs.
@@ -128,6 +134,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
user_product = user_product.map(lambda u_p: (int(u_p[0]), int(u_p[1])))
return self.call("predict", user_product)
+ @since("1.2.0")
def userFeatures(self):
"""
Returns a paired RDD, where the first element is the user and the
@@ -135,6 +142,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v))
+ @since("1.2.0")
def productFeatures(self):
"""
Returns a paired RDD, where the first element is the product and the
@@ -142,6 +150,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v))
+ @since("1.4.0")
def recommendUsers(self, product, num):
"""
Recommends the top "num" number of users for a given product and returns a list
@@ -149,6 +158,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return list(self.call("recommendUsers", product, num))
+ @since("1.4.0")
def recommendProducts(self, user, num):
"""
Recommends the top "num" number of products for a given user and returns a list
@@ -157,17 +167,25 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
return list(self.call("recommendProducts", user, num))
@property
+ @since("1.4.0")
def rank(self):
+ """Rank for the features in this model"""
return self.call("rank")
@classmethod
+ @since("1.3.1")
def load(cls, sc, path):
+ """Load a model from the given path"""
model = cls._load_java(sc, path)
wrapper = sc._jvm.MatrixFactorizationModelWrapper(model)
return MatrixFactorizationModel(wrapper)
class ALS(object):
+ """Alternating Least Squares matrix factorization
+
+ .. versionadded:: 0.9.0
+ """
@classmethod
def _prepare(cls, ratings):
@@ -188,15 +206,31 @@ class ALS(object):
return ratings
@classmethod
+ @since("0.9.0")
def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
seed=None):
+ """
+ Train a matrix factorization model given an RDD of ratings given by users to some products,
+ in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
+ product of two lower-rank matrices of a given rank (number of features). To solve for these
+ features, we run a given number of iterations of ALS. This is done using a level of
+ parallelism given by `blocks`.
+ """
model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
lambda_, blocks, nonnegative, seed)
return MatrixFactorizationModel(model)
@classmethod
+ @since("0.9.0")
def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
nonnegative=False, seed=None):
+ """
+ Train a matrix factorization model given an RDD of 'implicit preferences' given by users
+ to some products, in the form of (userID, productID, preference) pairs. We approximate the
+ ratings matrix as the product of two lower-rank matrices of a given rank (number of
+ features). To solve for these features, we run a given number of iterations of ALS.
+ This is done using a level of parallelism given by `blocks`.
+ """
model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
iterations, lambda_, blocks, alpha, nonnegative, seed)
return MatrixFactorizationModel(model)