diff options
author | Rishabh Bhardwaj <rbnext29@gmail.com> | 2015-11-09 14:27:36 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-11-09 14:27:36 -0800 |
commit | b7720fa45525cff6e812fa448d0841cb41f6c8a5 (patch) | |
tree | 934518fb170c41dcd2d6225f75dd0bb001476448 /examples/src/main/java | |
parent | 51d41e4b1a3a25a3fde3a4345afcfe4766023d23 (diff) | |
download | spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.tar.gz spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.tar.bz2 spark-b7720fa45525cff6e812fa448d0841cb41f6c8a5.zip |
[SPARK-11548][DOCS] Replaced example code in mllib-collaborative-filtering.md using include_example
Kindly review the changes.
Author: Rishabh Bhardwaj <rbnext29@gmail.com>
Closes #9519 from rishabhbhardwaj/SPARK-11337.
Diffstat (limited to 'examples/src/main/java')
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java new file mode 100644 index 0000000000..1065fde953 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.mllib; + +// $example on$ +import scala.Tuple2; + +import org.apache.spark.api.java.*; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.mllib.recommendation.ALS; +import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; +import org.apache.spark.mllib.recommendation.Rating; +import org.apache.spark.SparkConf; +// $example off$ + +public class JavaRecommendationExample { + public static void main(String args[]) { + // $example on$ + SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example"); + JavaSparkContext jsc = new JavaSparkContext(conf); + + // Load and parse the data + String path = "data/mllib/als/test.data"; + JavaRDD<String> data = jsc.textFile(path); + JavaRDD<Rating> ratings = data.map( + new Function<String, Rating>() { + public Rating call(String s) { + String[] sarray = s.split(","); + return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]), + Double.parseDouble(sarray[2])); + } + } + ); + + // Build the recommendation model using ALS + int rank = 10; + int numIterations = 10; + MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01); + + // Evaluate the model on rating data + JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map( + new Function<Rating, Tuple2<Object, Object>>() { + public Tuple2<Object, Object> call(Rating r) { + return new Tuple2<Object, Object>(r.user(), r.product()); + } + } + ); + JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD( + model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map( + new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { + public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){ + return new Tuple2<Tuple2<Integer, Integer>, Double>( + new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating()); + } + } + )); + JavaRDD<Tuple2<Double, Double>> ratesAndPreds = + JavaPairRDD.fromJavaRDD(ratings.map( + new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { + public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){ + return new Tuple2<Tuple2<Integer, Integer>, Double>( + new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating()); + } + } + )).join(predictions).values(); + double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map( + new Function<Tuple2<Double, Double>, Object>() { + public Object call(Tuple2<Double, Double> pair) { + Double err = pair._1() - pair._2(); + return err * err; + } + } + ).rdd()).mean(); + System.out.println("Mean Squared Error = " + MSE); + + // Save and load model + model.save(jsc.sc(), "target/tmp/myCollaborativeFilter"); + MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(jsc.sc(), + "target/tmp/myCollaborativeFilter"); + // $example off$ + } +} |