From f4fa61effe34dae2f0eab0bef57b2dee220cf92f Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 18 Aug 2015 12:55:36 -0700 Subject: [SPARK-10029] [MLLIB] [DOC] Add Python examples for mllib IsotonicRegression user guide Add Python examples for mllib IsotonicRegression user guide Author: Yanbo Liang Closes #8225 from yanboliang/spark-10029. --- docs/mllib-isotonic-regression.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'docs/mllib-isotonic-regression.md') diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md index 5732bc4c7e..6aa881f749 100644 --- a/docs/mllib-isotonic-regression.md +++ b/docs/mllib-isotonic-regression.md @@ -160,4 +160,39 @@ model.save(sc.sc(), "myModelPath"); IsotonicRegressionModel sameModel = IsotonicRegressionModel.load(sc.sc(), "myModelPath"); {% endhighlight %} + +
+Data are read from a file where each line has a format label,feature +i.e. 4710.28,500.00. The data are split to training and testing set. +Model is created using the training set and a mean squared error is calculated from the predicted +labels and real labels in the test set. + +{% highlight python %} +import math +from pyspark.mllib.regression import IsotonicRegression, IsotonicRegressionModel + +data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt") + +# Create label, feature, weight tuples from input data with weight set to default value 1.0. +parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,)) + +# Split data into training (60%) and test (40%) sets. +training, test = parsedData.randomSplit([0.6, 0.4], 11) + +# Create isotonic regression model from training data. +# Isotonic parameter defaults to true so it is only shown for demonstration +model = IsotonicRegression.train(training) + +# Create tuples of predicted and real labels. +predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0])) + +# Calculate mean squared error between predicted and real labels. +meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean() +print("Mean Squared Error = " + str(meanSquaredError)) + +# Save and load model +model.save(sc, "myModelPath") +sameModel = IsotonicRegressionModel.load(sc, "myModelPath") +{% endhighlight %} +
-- cgit v1.2.3