aboutsummaryrefslogtreecommitdiff
path: root/docs/mllib-linear-methods.md
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-01-12 12:13:32 +0000
committerSean Owen <sowen@cloudera.com>2016-01-12 12:13:32 +0000
commit9c7f34af37ef328149c1d66b4689d80a1589e1cc (patch)
treeef899b4df0e2bd15a8e95339407910069fcf0797 /docs/mllib-linear-methods.md
parentc48f2a3a5fd714ad2ff19b29337e55583988431e (diff)
downloadspark-9c7f34af37ef328149c1d66b4689d80a1589e1cc.tar.gz
spark-9c7f34af37ef328149c1d66b4689d80a1589e1cc.tar.bz2
spark-9c7f34af37ef328149c1d66b4689d80a1589e1cc.zip
[SPARK-5273][MLLIB][DOCS] Improve documentation examples for LinearRegression
Use a much smaller step size in LinearRegressionWithSGD MLlib examples to achieve a reasonable RMSE. Our training folks hit this exact same issue when concocting an example and had the same solution. Author: Sean Owen <sowen@cloudera.com> Closes #10675 from srowen/SPARK-5273.
Diffstat (limited to 'docs/mllib-linear-methods.md')
-rw-r--r--docs/mllib-linear-methods.md8
1 files changed, 5 insertions, 3 deletions
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 20b35612ca..aac8f7560a 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -590,7 +590,8 @@ val parsedData = data.map { line =>
// Building the model
val numIterations = 100
-val model = LinearRegressionWithSGD.train(parsedData, numIterations)
+val stepSize = 0.00000001
+val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)
// Evaluate model on training examples and compute training error
val valuesAndPreds = parsedData.map { point =>
@@ -655,8 +656,9 @@ public class LinearRegression {
// Building the model
int numIterations = 100;
+ double stepSize = 0.00000001;
final LinearRegressionModel model =
- LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData), numIterations);
+ LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData), numIterations, stepSize);
// Evaluate model on training examples and compute training error
JavaRDD<Tuple2<Double, Double>> valuesAndPreds = parsedData.map(
@@ -706,7 +708,7 @@ data = sc.textFile("data/mllib/ridge-data/lpsa.data")
parsedData = data.map(parsePoint)
# Build the model
-model = LinearRegressionWithSGD.train(parsedData)
+model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.00000001)
# Evaluate the model on training data
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))