aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-04-30 23:51:00 -0700
committerXiangrui Meng <meng@databricks.com>2015-04-30 23:51:00 -0700
commitc24aeb6a310b49dba8db1f4642531780a2e27253 (patch)
tree78a78ef9682b92eb57bccb91c445fefaf9bb238e /python
parent14b32886fa01aef6cc0dfbc263eb6d4c9d2876fa (diff)
downloadspark-c24aeb6a310b49dba8db1f4642531780a2e27253.tar.gz
spark-c24aeb6a310b49dba8db1f4642531780a2e27253.tar.bz2
spark-c24aeb6a310b49dba8db1f4642531780a2e27253.zip
[SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation
Adds rank, recommendUsers and RecommendProducts to MatrixFactorizationModel in PySpark. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #5807 from MechCoder/spark-6257 and squashes the following commits: 09629c6 [MechCoder] doc 953b326 [MechCoder] [SPARK-6257] MLlib API missing items in Recommendation
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/recommendation.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 4b7d17d64e..9c4647ddfd 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -65,6 +65,13 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
>>> model.userFeatures().collect()
[(1, array('d', [...])), (2, array('d', [...]))]
+ >>> model.recommendUsers(1, 2)
+ [Rating(user=2, product=1, rating=1.9...), Rating(user=1, product=1, rating=1.0...)]
+ >>> model.recommendProducts(1, 2)
+ [Rating(user=1, product=2, rating=1.9...), Rating(user=1, product=1, rating=1.0...)]
+ >>> model.rank
+ 4
+
>>> first_user = model.userFeatures().take(1)[0]
>>> latents = first_user[1]
>>> len(latents) == 4
@@ -105,9 +112,15 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
... pass
"""
def predict(self, user, product):
+ """
+ Predicts rating for the given user and product.
+ """
return self._java_model.predict(int(user), int(product))
def predictAll(self, user_product):
+ """
+ Returns a list of predicted ratings for input user and product pairs.
+ """
assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)"
first = user_product.first()
assert len(first) == 2, "user_product should be RDD of (user, product)"
@@ -115,11 +128,37 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
return self.call("predict", user_product)
def userFeatures(self):
+ """
+ Returns a paired RDD, where the first element is the user and the
+ second is an array of features corresponding to that user.
+ """
return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v))
def productFeatures(self):
+ """
+ Returns a paired RDD, where the first element is the product and the
+ second is an array of features corresponding to that product.
+ """
return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v))
+ def recommendUsers(self, product, num):
+ """
+ Recommends the top "num" number of users for a given product and returns a list
+ of Rating objects sorted by the predicted rating in descending order.
+ """
+ return list(self.call("recommendUsers", product, num))
+
+ def recommendProducts(self, user, num):
+ """
+ Recommends the top "num" number of products for a given user and returns a list
+ of Rating objects sorted by the predicted rating in descending order.
+ """
+ return list(self.call("recommendProducts", user, num))
+
+ @property
+ def rank(self):
+ return self.call("rank")
+
@classmethod
def load(cls, sc, path):
model = cls._load_java(sc, path)