From f4fa61effe34dae2f0eab0bef57b2dee220cf92f Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 18 Aug 2015 12:55:36 -0700
Subject: [SPARK-10029] [MLLIB] [DOC] Add Python examples for mllib
 IsotonicRegression user guide

Add Python examples for mllib IsotonicRegression user guide

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #8225 from yanboliang/spark-10029.
---
 docs/mllib-isotonic-regression.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'docs/mllib-isotonic-regression.md')
diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md
index 5732bc4c7e..6aa881f749 100644
--- a/docs/mllib-isotonic-regression.md
+++ b/docs/mllib-isotonic-regression.md
@@ -160,4 +160,39 @@ model.save(sc.sc(), "myModelPath");
 IsotonicRegressionModel sameModel = IsotonicRegressionModel.load(sc.sc(), "myModelPath");
 {% endhighlight %}
 </div>
+
+<div data-lang="python" markdown="1">
+Data are read from a file where each line has a format label,feature
+i.e. 4710.28,500.00. The data are split to training and testing set.
+Model is created using the training set and a mean squared error is calculated from the predicted
+labels and real labels in the test set.
+
+{% highlight python %}
+import math
+from pyspark.mllib.regression import IsotonicRegression, IsotonicRegressionModel
+
+data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt")
+
+# Create label, feature, weight tuples from input data with weight set to default value 1.0.
+parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,))
+
+# Split data into training (60%) and test (40%) sets.
+training, test = parsedData.randomSplit([0.6, 0.4], 11)
+
+# Create isotonic regression model from training data.
+# Isotonic parameter defaults to true so it is only shown for demonstration
+model = IsotonicRegression.train(training)
+
+# Create tuples of predicted and real labels.
+predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))
+
+# Calculate mean squared error between predicted and real labels.
+meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean()
+print("Mean Squared Error = " + str(meanSquaredError))
+
+# Save and load model
+model.save(sc, "myModelPath")
+sameModel = IsotonicRegressionModel.load(sc, "myModelPath")
+{% endhighlight %}
+</div>
 </div>
-- 
cgit v1.2.3