aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-02-26 08:31:55 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-26 08:31:55 -0800
commit7af0de076f74e975c9235c88b0f11b22fcbae060 (patch)
tree0dfcb6c7eb3213c7d37ed203e7f513112bfbba02 /examples/src/main/scala
parentb33261f91387904c5aaccae40f86922c92a4e09a (diff)
downloadspark-7af0de076f74e975c9235c88b0f11b22fcbae060.tar.gz
spark-7af0de076f74e975c9235c88b0f11b22fcbae060.tar.bz2
spark-7af0de076f74e975c9235c88b0f11b22fcbae060.zip
[SPARK-11381][DOCS] Replace example code in mllib-linear-methods.md using include_example
## What changes were proposed in this pull request? This PR replaces example codes in `mllib-linear-methods.md` using `include_example` by doing the followings: * Extracts the example codes(Scala,Java,Python) as files in `example` module. * Merges some dialog-style examples into a single file. * Hide redundant codes in HTML for the consistency with other docs. ## How was the this patch tested? manual test. This PR can be tested by document generations, `SKIP_API=1 jekyll build`. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #11320 from dongjoon-hyun/SPARK-11381.
Diffstat (limited to 'examples/src/main/scala')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala64
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala69
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala70
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala58
4 files changed, 261 insertions, 0 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
new file mode 100644
index 0000000000..669868787e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.LinearRegressionModel
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+// $example off$
+
+object LinearRegressionWithSGDExample {
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("LinearRegressionWithSGDExample")
+ val sc = new SparkContext(conf)
+
+ // $example on$
+ // Load and parse the data
+ val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
+ val parsedData = data.map { line =>
+ val parts = line.split(',')
+ LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
+ }.cache()
+
+ // Building the model
+ val numIterations = 100
+ val stepSize = 0.00000001
+ val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)
+
+ // Evaluate model on training examples and compute training error
+ val valuesAndPreds = parsedData.map { point =>
+ val prediction = model.predict(point.features)
+ (point.label, prediction)
+ }
+ val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()
+ println("training Mean Squared Error = " + MSE)
+
+ // Save and load model
+ model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
+ val sameModel = LinearRegressionModel.load(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
+ // $example off$
+
+ sc.stop()
+ }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala
new file mode 100644
index 0000000000..632a2d537e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{LogisticRegressionModel, LogisticRegressionWithLBFGS}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object LogisticRegressionWithLBFGSExample {
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("LogisticRegressionWithLBFGSExample")
+ val sc = new SparkContext(conf)
+
+ // $example on$
+ // Load training data in LIBSVM format.
+ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+ // Split data into training (60%) and test (40%).
+ val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+ val training = splits(0).cache()
+ val test = splits(1)
+
+ // Run training algorithm to build the model
+ val model = new LogisticRegressionWithLBFGS()
+ .setNumClasses(10)
+ .run(training)
+
+ // Compute raw scores on the test set.
+ val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
+ val prediction = model.predict(features)
+ (prediction, label)
+ }
+
+ // Get evaluation metrics.
+ val metrics = new MulticlassMetrics(predictionAndLabels)
+ val precision = metrics.precision
+ println("Precision = " + precision)
+
+ // Save and load model
+ model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel")
+ val sameModel = LogisticRegressionModel.load(sc,
+ "target/tmp/scalaLogisticRegressionWithLBFGSModel")
+ // $example off$
+
+ sc.stop()
+ }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
new file mode 100644
index 0000000000..b73fe9b2b3
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object SVMWithSGDExample {
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf().setAppName("SVMWithSGDExample")
+ val sc = new SparkContext(conf)
+
+ // $example on$
+ // Load training data in LIBSVM format.
+ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+ // Split data into training (60%) and test (40%).
+ val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+ val training = splits(0).cache()
+ val test = splits(1)
+
+ // Run training algorithm to build the model
+ val numIterations = 100
+ val model = SVMWithSGD.train(training, numIterations)
+
+ // Clear the default threshold.
+ model.clearThreshold()
+
+ // Compute raw scores on the test set.
+ val scoreAndLabels = test.map { point =>
+ val score = model.predict(point.features)
+ (score, point.label)
+ }
+
+ // Get evaluation metrics.
+ val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+ val auROC = metrics.areaUnderROC()
+
+ println("Area under ROC = " + auROC)
+
+ // Save and load model
+ model.save(sc, "target/tmp/scalaSVMWithSGDModel")
+ val sameModel = SVMModel.load(sc, "target/tmp/scalaSVMWithSGDModel")
+ // $example off$
+
+ sc.stop()
+ }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala
new file mode 100644
index 0000000000..0a1cd2d62d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
+// $example off$
+import org.apache.spark.streaming._
+
+object StreamingLinearRegressionExample {
+
+ def main(args: Array[String]): Unit = {
+ if (args.length != 2) {
+ System.err.println("Usage: StreamingLinearRegressionExample <trainingDir> <testDir>")
+ System.exit(1)
+ }
+
+ val conf = new SparkConf().setAppName("StreamingLinearRegressionExample")
+ val ssc = new StreamingContext(conf, Seconds(1))
+
+ // $example on$
+ val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse).cache()
+ val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+ val numFeatures = 3
+ val model = new StreamingLinearRegressionWithSGD()
+ .setInitialWeights(Vectors.zeros(numFeatures))
+
+ model.trainOn(trainingData)
+ model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+ ssc.start()
+ ssc.awaitTermination()
+ // $example off$
+
+ ssc.stop()
+ }
+}
+// scalastyle:on println