aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorRishabh Bhardwaj <rbnext29@gmail.com>2015-11-09 14:27:36 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-09 14:27:36 -0800
commitb7720fa45525cff6e812fa448d0841cb41f6c8a5 (patch)
tree934518fb170c41dcd2d6225f75dd0bb001476448 /examples
parent51d41e4b1a3a25a3fde3a4345afcfe4766023d23 (diff)
downloadspark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.tar.gz
spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.tar.bz2
spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.zip
[SPARK-11548][DOCS] Replaced example code in mllib-collaborative-filtering.md using include_example
Kindly review the changes. Author: Rishabh Bhardwaj <rbnext29@gmail.com> Closes #9519 from rishabhbhardwaj/SPARK-11337.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java97
-rw-r--r--examples/src/main/python/mllib/recommendation_example.py54
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala67
3 files changed, 218 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
new file mode 100644
index 0000000000..1065fde953
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+import org.apache.spark.SparkConf;
+// $example off$
+
+public class JavaRecommendationExample {
+ public static void main(String args[]) {
+ // $example on$
+ SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example");
+ JavaSparkContext jsc = new JavaSparkContext(conf);
+
+ // Load and parse the data
+ String path = "data/mllib/als/test.data";
+ JavaRDD<String> data = jsc.textFile(path);
+ JavaRDD<Rating> ratings = data.map(
+ new Function<String, Rating>() {
+ public Rating call(String s) {
+ String[] sarray = s.split(",");
+ return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]),
+ Double.parseDouble(sarray[2]));
+ }
+ }
+ );
+
+ // Build the recommendation model using ALS
+ int rank = 10;
+ int numIterations = 10;
+ MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);
+
+ // Evaluate the model on rating data
+ JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(
+ new Function<Rating, Tuple2<Object, Object>>() {
+ public Tuple2<Object, Object> call(Rating r) {
+ return new Tuple2<Object, Object>(r.user(), r.product());
+ }
+ }
+ );
+ JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(
+ model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
+ new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
+ public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
+ return new Tuple2<Tuple2<Integer, Integer>, Double>(
+ new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
+ }
+ }
+ ));
+ JavaRDD<Tuple2<Double, Double>> ratesAndPreds =
+ JavaPairRDD.fromJavaRDD(ratings.map(
+ new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
+ public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
+ return new Tuple2<Tuple2<Integer, Integer>, Double>(
+ new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
+ }
+ }
+ )).join(predictions).values();
+ double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
+ new Function<Tuple2<Double, Double>, Object>() {
+ public Object call(Tuple2<Double, Double> pair) {
+ Double err = pair._1() - pair._2();
+ return err * err;
+ }
+ }
+ ).rdd()).mean();
+ System.out.println("Mean Squared Error = " + MSE);
+
+ // Save and load model
+ model.save(jsc.sc(), "target/tmp/myCollaborativeFilter");
+ MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(jsc.sc(),
+ "target/tmp/myCollaborativeFilter");
+ // $example off$
+ }
+}
diff --git a/examples/src/main/python/mllib/recommendation_example.py b/examples/src/main/python/mllib/recommendation_example.py
new file mode 100644
index 0000000000..615db0749b
--- /dev/null
+++ b/examples/src/main/python/mllib/recommendation_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Collaborative Filtering Classification Example.
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+
+# $example on$
+from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
+# $example off$
+
+if __name__ == "__main__":
+ sc = SparkContext(appName="PythonCollaborativeFilteringExample")
+ # $example on$
+ # Load and parse the data
+ data = sc.textFile("data/mllib/als/test.data")
+ ratings = data.map(lambda l: l.split(','))\
+ .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
+
+ # Build the recommendation model using Alternating Least Squares
+ rank = 10
+ numIterations = 10
+ model = ALS.train(ratings, rank, numIterations)
+
+ # Evaluate the model on training data
+ testdata = ratings.map(lambda p: (p[0], p[1]))
+ predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
+ ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
+ MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
+ print("Mean Squared Error = " + str(MSE))
+
+ # Save and load model
+ model.save(sc, "target/tmp/myCollaborativeFilter")
+ sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
+ # $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
new file mode 100644
index 0000000000..64e4602465
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkContext, SparkConf}
+// $example on$
+import org.apache.spark.mllib.recommendation.ALS
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
+import org.apache.spark.mllib.recommendation.Rating
+// $example off$
+
+object RecommendationExample {
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("CollaborativeFilteringExample")
+ val sc = new SparkContext(conf)
+ // $example on$
+ // Load and parse the data
+ val data = sc.textFile("data/mllib/als/test.data")
+ val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
+ Rating(user.toInt, item.toInt, rate.toDouble)
+ })
+
+ // Build the recommendation model using ALS
+ val rank = 10
+ val numIterations = 10
+ val model = ALS.train(ratings, rank, numIterations, 0.01)
+
+ // Evaluate the model on rating data
+ val usersProducts = ratings.map { case Rating(user, product, rate) =>
+ (user, product)
+ }
+ val predictions =
+ model.predict(usersProducts).map { case Rating(user, product, rate) =>
+ ((user, product), rate)
+ }
+ val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
+ ((user, product), rate)
+ }.join(predictions)
+ val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
+ val err = (r1 - r2)
+ err * err
+ }.mean()
+ println("Mean Squared Error = " + MSE)
+
+ // Save and load model
+ model.save(sc, "target/tmp/myCollaborativeFilter")
+ val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
+ // $example off$
+ }
+}
+// scalastyle:on println