aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorPravin Gadakh <pravingadakh177@gmail.com>2015-11-10 14:47:04 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-10 14:47:04 -0800
commit638c51d9380081b3b8182be2c2460bd53b8b0a4f (patch)
treeffece4d1f0a6832fd600a04e53638cdddf29c9aa /docs
parent724cf7a38c551bf2a79b87a8158bbe1725f9f888 (diff)
downloadspark-638c51d9380081b3b8182be2c2460bd53b8b0a4f.tar.gz
spark-638c51d9380081b3b8182be2c2460bd53b8b0a4f.tar.bz2
spark-638c51d9380081b3b8182be2c2460bd53b8b0a4f.zip
[SPARK-11550][DOCS] Replace example code in mllib-optimization.md using include_example
Author: Pravin Gadakh <pravingadakh177@gmail.com> Closes #9516 from pravingadakh/SPARK-11550.
Diffstat (limited to 'docs')
-rw-r--r--docs/mllib-optimization.md145
1 files changed, 2 insertions, 143 deletions
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index a3bd130ba0..ad7bcd9bfd 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -220,154 +220,13 @@ L-BFGS optimizer.
<div data-lang="scala" markdown="1">
Refer to the [`LBFGS` Scala docs](api/scala/index.html#org.apache.spark.mllib.optimization.LBFGS) and [`SquaredL2Updater` Scala docs](api/scala/index.html#org.apache.spark.mllib.optimization.SquaredL2Updater) for details on the API.
-{% highlight scala %}
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.mllib.classification.LogisticRegressionModel
-import org.apache.spark.mllib.optimization.{LBFGS, LogisticGradient, SquaredL2Updater}
-
-val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
-val numFeatures = data.take(1)(0).features.size
-
-// Split data into training (60%) and test (40%).
-val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
-
-// Append 1 into the training data as intercept.
-val training = splits(0).map(x => (x.label, MLUtils.appendBias(x.features))).cache()
-
-val test = splits(1)
-
-// Run training algorithm to build the model
-val numCorrections = 10
-val convergenceTol = 1e-4
-val maxNumIterations = 20
-val regParam = 0.1
-val initialWeightsWithIntercept = Vectors.dense(new Array[Double](numFeatures + 1))
-
-val (weightsWithIntercept, loss) = LBFGS.runLBFGS(
- training,
- new LogisticGradient(),
- new SquaredL2Updater(),
- numCorrections,
- convergenceTol,
- maxNumIterations,
- regParam,
- initialWeightsWithIntercept)
-
-val model = new LogisticRegressionModel(
- Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1)),
- weightsWithIntercept(weightsWithIntercept.size - 1))
-
-// Clear the default threshold.
-model.clearThreshold()
-
-// Compute raw scores on the test set.
-val scoreAndLabels = test.map { point =>
- val score = model.predict(point.features)
- (score, point.label)
-}
-
-// Get evaluation metrics.
-val metrics = new BinaryClassificationMetrics(scoreAndLabels)
-val auROC = metrics.areaUnderROC()
-
-println("Loss of each step in training process")
-loss.foreach(println)
-println("Area under ROC = " + auROC)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/mllib/LBFGSExample.scala %}
</div>
<div data-lang="java" markdown="1">
Refer to the [`LBFGS` Java docs](api/java/org/apache/spark/mllib/optimization/LBFGS.html) and [`SquaredL2Updater` Java docs](api/java/org/apache/spark/mllib/optimization/SquaredL2Updater.html) for details on the API.
-{% highlight java %}
-import java.util.Arrays;
-import java.util.Random;
-
-import scala.Tuple2;
-
-import org.apache.spark.api.java.*;
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.mllib.classification.LogisticRegressionModel;
-import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.optimization.*;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
-import org.apache.spark.SparkConf;
-import org.apache.spark.SparkContext;
-
-public class LBFGSExample {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("L-BFGS Example");
- SparkContext sc = new SparkContext(conf);
- String path = "data/mllib/sample_libsvm_data.txt";
- JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
- int numFeatures = data.take(1).get(0).features().size();
-
- // Split initial RDD into two... [60% training data, 40% testing data].
- JavaRDD<LabeledPoint> trainingInit = data.sample(false, 0.6, 11L);
- JavaRDD<LabeledPoint> test = data.subtract(trainingInit);
-
- // Append 1 into the training data as intercept.
- JavaRDD<Tuple2<Object, Vector>> training = data.map(
- new Function<LabeledPoint, Tuple2<Object, Vector>>() {
- public Tuple2<Object, Vector> call(LabeledPoint p) {
- return new Tuple2<Object, Vector>(p.label(), MLUtils.appendBias(p.features()));
- }
- });
- training.cache();
-
- // Run training algorithm to build the model.
- int numCorrections = 10;
- double convergenceTol = 1e-4;
- int maxNumIterations = 20;
- double regParam = 0.1;
- Vector initialWeightsWithIntercept = Vectors.dense(new double[numFeatures + 1]);
-
- Tuple2<Vector, double[]> result = LBFGS.runLBFGS(
- training.rdd(),
- new LogisticGradient(),
- new SquaredL2Updater(),
- numCorrections,
- convergenceTol,
- maxNumIterations,
- regParam,
- initialWeightsWithIntercept);
- Vector weightsWithIntercept = result._1();
- double[] loss = result._2();
-
- final LogisticRegressionModel model = new LogisticRegressionModel(
- Vectors.dense(Arrays.copyOf(weightsWithIntercept.toArray(), weightsWithIntercept.size() - 1)),
- (weightsWithIntercept.toArray())[weightsWithIntercept.size() - 1]);
-
- // Clear the default threshold.
- model.clearThreshold();
-
- // Compute raw scores on the test set.
- JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(
- new Function<LabeledPoint, Tuple2<Object, Object>>() {
- public Tuple2<Object, Object> call(LabeledPoint p) {
- Double score = model.predict(p.features());
- return new Tuple2<Object, Object>(score, p.label());
- }
- });
-
- // Get evaluation metrics.
- BinaryClassificationMetrics metrics =
- new BinaryClassificationMetrics(scoreAndLabels.rdd());
- double auROC = metrics.areaUnderROC();
-
- System.out.println("Loss of each step in training process");
- for (double l : loss)
- System.out.println(l);
- System.out.println("Area under ROC = " + auROC);
- }
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/mllib/JavaLBFGSExample.java %}
</div>
</div>