aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorNick Pentreath <nickp@za.ibm.com>2017-03-02 15:51:16 +0200
committerNick Pentreath <nickp@za.ibm.com>2017-03-02 15:51:16 +0200
commit9cca3dbf4add9004a769dee1a556987e37230294 (patch)
tree8b4e17421300fce8038797c71bfa0d63c261ca87 /examples/src
parent50c08e82f011dd31b4ff7ff2b45fb9fb4c0e3231 (diff)
downloadspark-9cca3dbf4add9004a769dee1a556987e37230294.tar.gz
spark-9cca3dbf4add9004a769dee1a556987e37230294.tar.bz2
spark-9cca3dbf4add9004a769dee1a556987e37230294.zip
[SPARK-19345][ML][DOC] Add doc for "coldStartStrategy" usage in ALS
[SPARK-14489](https://issues.apache.org/jira/browse/SPARK-14489) added the ability to skip `NaN` predictions during `ALSModel.transform`. This PR adds documentation for the `coldStartStrategy` param to the ALS user guide, and add code to the examples to illustrate usage. ## How was this patch tested? Doc and example change only. Build HTML doc locally and verified example code builds, and runs in shell for Scala/Python. Author: Nick Pentreath <nickp@za.ibm.com> Closes #17102 from MLnick/SPARK-19345-coldstart-doc.
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java2
-rw-r--r--examples/src/main/python/ml/als_example.py4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala2
3 files changed, 7 insertions, 1 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
index 33ba668b32..81970b7c81 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
@@ -103,6 +103,8 @@ public class JavaALSExample {
ALSModel model = als.fit(training);
// Evaluate the model by computing the RMSE on the test data
+ // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+ model.setColdStartStrategy("drop");
Dataset<Row> predictions = model.transform(test);
RegressionEvaluator evaluator = new RegressionEvaluator()
diff --git a/examples/src/main/python/ml/als_example.py b/examples/src/main/python/ml/als_example.py
index 1a979ff5b5..2e7214ed56 100644
--- a/examples/src/main/python/ml/als_example.py
+++ b/examples/src/main/python/ml/als_example.py
@@ -44,7 +44,9 @@ if __name__ == "__main__":
(training, test) = ratings.randomSplit([0.8, 0.2])
# Build the recommendation model using ALS on the training data
- als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating")
+ # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+ als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating",
+ coldStartStrategy="drop")
model = als.fit(training)
# Evaluate the model by computing the RMSE on the test data
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
index bb5d163608..868f49b16f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -65,6 +65,8 @@ object ALSExample {
val model = als.fit(training)
// Evaluate the model by computing the RMSE on the test data
+ // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+ model.setColdStartStrategy("drop")
val predictions = model.transform(test)
val evaluator = new RegressionEvaluator()