diff options
author | Rishabh Bhardwaj <rbnext29@gmail.com> | 2015-11-02 14:03:50 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-11-02 14:03:50 -0800 |
commit | 2804674a7af8f11eeb1280459bc9145815398eed (patch) | |
tree | abf1a9e6a9044019ede87155715ed38ab96fde14 /docs/mllib-isotonic-regression.md | |
parent | db11ee5e56e5fac59895c772a9a87c5ac86888ef (diff) | |
download | spark-2804674a7af8f11eeb1280459bc9145815398eed.tar.gz spark-2804674a7af8f11eeb1280459bc9145815398eed.tar.bz2 spark-2804674a7af8f11eeb1280459bc9145815398eed.zip |
[SPARK-11383][DOCS] Replaced example code in mllib-naive-bayes.md/mllib-isotonic-regression.md using include_example
I have made the required changes in mllib-naive-bayes.md/mllib-isotonic-regression.md and also verified them.
Kindle Review it.
Author: Rishabh Bhardwaj <rbnext29@gmail.com>
Closes #9353 from rishabhbhardwaj/SPARK-11383.
Diffstat (limited to 'docs/mllib-isotonic-regression.md')
-rw-r--r-- | docs/mllib-isotonic-regression.md | 124 |
1 files changed, 3 insertions, 121 deletions
diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md index f91a697b31..85f9226b43 100644 --- a/docs/mllib-isotonic-regression.md +++ b/docs/mllib-isotonic-regression.md @@ -61,42 +61,8 @@ labels and real labels in the test set. Refer to the [`IsotonicRegression` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.IsotonicRegression) and [`IsotonicRegressionModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.IsotonicRegressionModel) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.regression.{IsotonicRegression, IsotonicRegressionModel} - -val data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt") - -// Create label, feature, weight tuples from input data with weight set to default value 1.0. -val parsedData = data.map { line => - val parts = line.split(',').map(_.toDouble) - (parts(0), parts(1), 1.0) -} - -// Split data into training (60%) and test (40%) sets. -val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L) -val training = splits(0) -val test = splits(1) - -// Create isotonic regression model from training data. -// Isotonic parameter defaults to true so it is only shown for demonstration -val model = new IsotonicRegression().setIsotonic(true).run(training) - -// Create tuples of predicted and real labels. -val predictionAndLabel = test.map { point => - val predictedLabel = model.predict(point._2) - (predictedLabel, point._1) -} - -// Calculate mean squared error between predicted and real labels. -val meanSquaredError = predictionAndLabel.map{case(p, l) => math.pow((p - l), 2)}.mean() -println("Mean Squared Error = " + meanSquaredError) - -// Save and load model -model.save(sc, "myModelPath") -val sameModel = IsotonicRegressionModel.load(sc, "myModelPath") -{% endhighlight %} +{% include_example scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala %} </div> - <div data-lang="java" markdown="1"> Data are read from a file where each line has a format label,feature i.e. 4710.28,500.00. The data are split to training and testing set. @@ -105,66 +71,8 @@ labels and real labels in the test set. Refer to the [`IsotonicRegression` Java docs](api/java/org/apache/spark/mllib/regression/IsotonicRegression.html) and [`IsotonicRegressionModel` Java docs](api/java/org/apache/spark/mllib/regression/IsotonicRegressionModel.html) for details on the API. -{% highlight java %} -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaDoubleRDD; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.Function; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.mllib.regression.IsotonicRegressionModel; -import scala.Tuple2; -import scala.Tuple3; - -JavaRDD<String> data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt"); - -// Create label, feature, weight tuples from input data with weight set to default value 1.0. -JavaRDD<Tuple3<Double, Double, Double>> parsedData = data.map( - new Function<String, Tuple3<Double, Double, Double>>() { - public Tuple3<Double, Double, Double> call(String line) { - String[] parts = line.split(","); - return new Tuple3<>(new Double(parts[0]), new Double(parts[1]), 1.0); - } - } -); - -// Split data into training (60%) and test (40%) sets. -JavaRDD<Tuple3<Double, Double, Double>>[] splits = parsedData.randomSplit(new double[] {0.6, 0.4}, 11L); -JavaRDD<Tuple3<Double, Double, Double>> training = splits[0]; -JavaRDD<Tuple3<Double, Double, Double>> test = splits[1]; - -// Create isotonic regression model from training data. -// Isotonic parameter defaults to true so it is only shown for demonstration -IsotonicRegressionModel model = new IsotonicRegression().setIsotonic(true).run(training); - -// Create tuples of predicted and real labels. -JavaPairRDD<Double, Double> predictionAndLabel = test.mapToPair( - new PairFunction<Tuple3<Double, Double, Double>, Double, Double>() { - @Override public Tuple2<Double, Double> call(Tuple3<Double, Double, Double> point) { - Double predictedLabel = model.predict(point._2()); - return new Tuple2<Double, Double>(predictedLabel, point._1()); - } - } -); - -// Calculate mean squared error between predicted and real labels. -Double meanSquaredError = new JavaDoubleRDD(predictionAndLabel.map( - new Function<Tuple2<Double, Double>, Object>() { - @Override public Object call(Tuple2<Double, Double> pl) { - return Math.pow(pl._1() - pl._2(), 2); - } - } -).rdd()).mean(); - -System.out.println("Mean Squared Error = " + meanSquaredError); - -// Save and load model -model.save(sc.sc(), "myModelPath"); -IsotonicRegressionModel sameModel = IsotonicRegressionModel.load(sc.sc(), "myModelPath"); -{% endhighlight %} +{% include_example java/org/apache/spark/examples/mllib/JavaIsotonicRegressionExample.java %} </div> - <div data-lang="python" markdown="1"> Data are read from a file where each line has a format label,feature i.e. 4710.28,500.00. The data are split to training and testing set. @@ -173,32 +81,6 @@ labels and real labels in the test set. Refer to the [`IsotonicRegression` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.IsotonicRegression) and [`IsotonicRegressionModel` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.IsotonicRegressionModel) for more details on the API. -{% highlight python %} -import math -from pyspark.mllib.regression import IsotonicRegression, IsotonicRegressionModel - -data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt") - -# Create label, feature, weight tuples from input data with weight set to default value 1.0. -parsedData = data.map(lambda line: tuple([float(x) for x in line.split(',')]) + (1.0,)) - -# Split data into training (60%) and test (40%) sets. -training, test = parsedData.randomSplit([0.6, 0.4], 11) - -# Create isotonic regression model from training data. -# Isotonic parameter defaults to true so it is only shown for demonstration -model = IsotonicRegression.train(training) - -# Create tuples of predicted and real labels. -predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0])) - -# Calculate mean squared error between predicted and real labels. -meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean() -print("Mean Squared Error = " + str(meanSquaredError)) - -# Save and load model -model.save(sc, "myModelPath") -sameModel = IsotonicRegressionModel.load(sc, "myModelPath") -{% endhighlight %} +{% include_example python/mllib/isotonic_regression_example.py %} </div> </div> |