aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python
diff options
context:
space:
mode:
authorRishabh Bhardwaj <rbnext29@gmail.com>2015-11-09 14:27:36 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-09 14:27:36 -0800
commitb7720fa45525cff6e812fa448d0841cb41f6c8a5 (patch)
tree934518fb170c41dcd2d6225f75dd0bb001476448 /examples/src/main/python
parent51d41e4b1a3a25a3fde3a4345afcfe4766023d23 (diff)
downloadspark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.tar.gz
spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.tar.bz2
spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.zip
[SPARK-11548][DOCS] Replaced example code in mllib-collaborative-filtering.md using include_example
Kindly review the changes. Author: Rishabh Bhardwaj <rbnext29@gmail.com> Closes #9519 from rishabhbhardwaj/SPARK-11337.
Diffstat (limited to 'examples/src/main/python')
-rw-r--r--examples/src/main/python/mllib/recommendation_example.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/examples/src/main/python/mllib/recommendation_example.py b/examples/src/main/python/mllib/recommendation_example.py
new file mode 100644
index 0000000000..615db0749b
--- /dev/null
+++ b/examples/src/main/python/mllib/recommendation_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Collaborative Filtering Classification Example.
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+
+# $example on$
+from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
+# $example off$
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="PythonCollaborativeFilteringExample")
+ # $example on$
+ # Load and parse the data
+ data = sc.textFile("data/mllib/als/test.data")
+ ratings = data.map(lambda l: l.split(','))\
+ .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
+
+ # Build the recommendation model using Alternating Least Squares
+ rank = 10
+ numIterations = 10
+ model = ALS.train(ratings, rank, numIterations)
+
+ # Evaluate the model on training data
+ testdata = ratings.map(lambda p: (p[0], p[1]))
+ predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
+ ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
+ MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
+ print("Mean Squared Error = " + str(MSE))
+
+ # Save and load model
+ model.save(sc, "target/tmp/myCollaborativeFilter")
+ sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
+ # $example off$