aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala73
1 files changed, 0 insertions, 73 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
deleted file mode 100644
index e5592966f1..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.SparkConf
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
-import org.apache.spark.streaming.{Seconds, StreamingContext}
-
-/**
- * Train a linear regression model on one stream of data and make predictions
- * on another stream, where the data streams arrive as text files
- * into two different directories.
- *
- * The rows of the text files must be labeled data points in the form
- * `(y,[x1,x2,x3,...,xn])`
- * Where n is the number of features. n must be the same for train and test.
- *
- * Usage: StreamingLinearRegression <trainingDir> <testDir> <batchDuration> <numFeatures>
- *
- * To run on your local machine using the two directories `trainingDir` and `testDir`,
- * with updates every 5 seconds, and 2 features per data point, call:
- * $ bin/run-example mllib.StreamingLinearRegression trainingDir testDir 5 2
- *
- * As you add text files to `trainingDir` the model will continuously update.
- * Anytime you add text files to `testDir`, you'll see predictions from the current model.
- *
- */
-object StreamingLinearRegression {
-
- def main(args: Array[String]) {
-
- if (args.length != 4) {
- System.err.println(
- "Usage: StreamingLinearRegression <trainingDir> <testDir> <batchDuration> <numFeatures>")
- System.exit(1)
- }
-
- val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression")
- val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
-
- val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse)
- val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
-
- val model = new StreamingLinearRegressionWithSGD()
- .setInitialWeights(Vectors.zeros(args(3).toInt))
-
- model.trainOn(trainingData)
- model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
-
- ssc.start()
- ssc.awaitTermination()
-
- }
-
-}
-// scalastyle:on println