[SPARK-11381][DOCS] Replace example code in mllib-linear-methods.md using include_example

## What changes were proposed in this pull request? This PR replaces example codes in `mllib-linear-methods.md` using `include_example` by doing the followings: * Extracts the example codes(Scala,Java,Python) as files in `example` module. * Merges some dialog-style examples into a single file. * Hide redundant codes in HTML for the consistency with other docs. ## How was the this patch tested? manual test. This PR can be tested by document generations, `SKIP_API=1 jekyll build`. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #11320 from dongjoon-hyun/SPARK-11381.
author: Dongjoon Hyun <dongjoon@apache.org> 2016-02-26 08:31:55 -0800
committer: Xiangrui Meng <meng@databricks.com> 2016-02-26 08:31:55 -0800
commit: 7af0de076f74e975c9235c88b0f11b22fcbae060 (patch)
tree: 0dfcb6c7eb3213c7d37ed203e7f513112bfbba02 /examples/src/main/scala
parent: b33261f91387904c5aaccae40f86922c92a4e09a (diff)
download: spark-7af0de076f74e975c9235c88b0f11b22fcbae060.tar.gz
spark-7af0de076f74e975c9235c88b0f11b22fcbae060.tar.bz2
spark-7af0de076f74e975c9235c88b0f11b22fcbae060.zip
4 files changed, 261 insertions, 0 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
new file mode 100644
index 0000000000..669868787e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.LinearRegressionModel
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+// $example off$
+
+object LinearRegressionWithSGDExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("LinearRegressionWithSGDExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
+    val parsedData = data.map { line =>
+      val parts = line.split(',')
+      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
+    }.cache()
+
+    // Building the model
+    val numIterations = 100
+    val stepSize = 0.00000001
+    val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)
+
+    // Evaluate model on training examples and compute training error
+    val valuesAndPreds = parsedData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()
+    println("training Mean Squared Error = " + MSE)
+
+    // Save and load model
+    model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
+    val sameModel = LinearRegressionModel.load(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala
new file mode 100644
index 0000000000..632a2d537e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{LogisticRegressionModel, LogisticRegressionWithLBFGS}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object LogisticRegressionWithLBFGSExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("LogisticRegressionWithLBFGSExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load training data in LIBSVM format.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    // Split data into training (60%) and test (40%).
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    val training = splits(0).cache()
+    val test = splits(1)
+
+    // Run training algorithm to build the model
+    val model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(10)
+      .run(training)
+
+    // Compute raw scores on the test set.
+    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
+      val prediction = model.predict(features)
+      (prediction, label)
+    }
+
+    // Get evaluation metrics.
+    val metrics = new MulticlassMetrics(predictionAndLabels)
+    val precision = metrics.precision
+    println("Precision = " + precision)
+
+    // Save and load model
+    model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel")
+    val sameModel = LogisticRegressionModel.load(sc,
+      "target/tmp/scalaLogisticRegressionWithLBFGSModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
new file mode 100644
index 0000000000..b73fe9b2b3
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object SVMWithSGDExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("SVMWithSGDExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load training data in LIBSVM format.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    // Split data into training (60%) and test (40%).
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    val training = splits(0).cache()
+    val test = splits(1)
+
+    // Run training algorithm to build the model
+    val numIterations = 100
+    val model = SVMWithSGD.train(training, numIterations)
+
+    // Clear the default threshold.
+    model.clearThreshold()
+
+    // Compute raw scores on the test set.
+    val scoreAndLabels = test.map { point =>
+      val score = model.predict(point.features)
+      (score, point.label)
+    }
+
+    // Get evaluation metrics.
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+    val auROC = metrics.areaUnderROC()
+
+    println("Area under ROC = " + auROC)
+
+    // Save and load model
+    model.save(sc, "target/tmp/scalaSVMWithSGDModel")
+    val sameModel = SVMModel.load(sc, "target/tmp/scalaSVMWithSGDModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala
new file mode 100644
index 0000000000..0a1cd2d62d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
+// $example off$
+import org.apache.spark.streaming._
+
+object StreamingLinearRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+    if (args.length != 2) {
+      System.err.println("Usage: StreamingLinearRegressionExample <trainingDir> <testDir>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setAppName("StreamingLinearRegressionExample")
+    val ssc = new StreamingContext(conf, Seconds(1))
+
+    // $example on$
+    val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse).cache()
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val numFeatures = 3
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.zeros(numFeatures))
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+    // $example off$
+
+    ssc.stop()
+  }
+}
+// scalastyle:on println
author	Dongjoon Hyun <dongjoon@apache.org>	2016-02-26 08:31:55 -0800
committer	Xiangrui Meng <meng@databricks.com>	2016-02-26 08:31:55 -0800
commit	7af0de076f74e975c9235c88b0f11b22fcbae060 (patch)
tree	0dfcb6c7eb3213c7d37ed203e7f513112bfbba02 /examples/src/main/scala
parent	b33261f91387904c5aaccae40f86922c92a4e09a (diff)
download	spark-7af0de076f74e975c9235c88b0f11b22fcbae060.tar.gz spark-7af0de076f74e975c9235c88b0f11b22fcbae060.tar.bz2 spark-7af0de076f74e975c9235c88b0f11b22fcbae060.zip