diff options
author | MechCoder <manojkumarsivaraj334@gmail.com> | 2015-04-30 23:51:00 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-04-30 23:51:00 -0700 |
commit | c24aeb6a310b49dba8db1f4642531780a2e27253 (patch) | |
tree | 78a78ef9682b92eb57bccb91c445fefaf9bb238e | |
parent | 14b32886fa01aef6cc0dfbc263eb6d4c9d2876fa (diff) | |
download | spark-c24aeb6a310b49dba8db1f4642531780a2e27253.tar.gz spark-c24aeb6a310b49dba8db1f4642531780a2e27253.tar.bz2 spark-c24aeb6a310b49dba8db1f4642531780a2e27253.zip |
[SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation
Adds
rank, recommendUsers and RecommendProducts to MatrixFactorizationModel in PySpark.
Author: MechCoder <manojkumarsivaraj334@gmail.com>
Closes #5807 from MechCoder/spark-6257 and squashes the following commits:
09629c6 [MechCoder] doc
953b326 [MechCoder] [SPARK-6257] MLlib API missing items in Recommendation
-rw-r--r-- | docs/mllib-collaborative-filtering.md | 2 | ||||
-rw-r--r-- | python/pyspark/mllib/recommendation.py | 39 |
2 files changed, 40 insertions, 1 deletions
diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md index 76140282a2..7b397e30b2 100644 --- a/docs/mllib-collaborative-filtering.md +++ b/docs/mllib-collaborative-filtering.md @@ -216,7 +216,7 @@ model = ALS.train(ratings, rank, numIterations) testdata = ratings.map(lambda p: (p[0], p[1])) predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2])) ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions) -MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y) / ratesAndPreds.count() +MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean() print("Mean Squared Error = " + str(MSE)) # Save and load model diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 4b7d17d64e..9c4647ddfd 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -65,6 +65,13 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): >>> model.userFeatures().collect() [(1, array('d', [...])), (2, array('d', [...]))] + >>> model.recommendUsers(1, 2) + [Rating(user=2, product=1, rating=1.9...), Rating(user=1, product=1, rating=1.0...)] + >>> model.recommendProducts(1, 2) + [Rating(user=1, product=2, rating=1.9...), Rating(user=1, product=1, rating=1.0...)] + >>> model.rank + 4 + >>> first_user = model.userFeatures().take(1)[0] >>> latents = first_user[1] >>> len(latents) == 4 @@ -105,9 +112,15 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): ... pass """ def predict(self, user, product): + """ + Predicts rating for the given user and product. + """ return self._java_model.predict(int(user), int(product)) def predictAll(self, user_product): + """ + Returns a list of predicted ratings for input user and product pairs. + """ assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)" first = user_product.first() assert len(first) == 2, "user_product should be RDD of (user, product)" @@ -115,11 +128,37 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader): return self.call("predict", user_product) def userFeatures(self): + """ + Returns a paired RDD, where the first element is the user and the + second is an array of features corresponding to that user. + """ return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v)) def productFeatures(self): + """ + Returns a paired RDD, where the first element is the product and the + second is an array of features corresponding to that product. + """ return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v)) + def recommendUsers(self, product, num): + """ + Recommends the top "num" number of users for a given product and returns a list + of Rating objects sorted by the predicted rating in descending order. + """ + return list(self.call("recommendUsers", product, num)) + + def recommendProducts(self, user, num): + """ + Recommends the top "num" number of products for a given user and returns a list + of Rating objects sorted by the predicted rating in descending order. + """ + return list(self.call("recommendProducts", user, num)) + + @property + def rank(self): + return self.call("rank") + @classmethod def load(cls, sc, path): model = cls._load_java(sc, path) |