diff options
Diffstat (limited to 'examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala')
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala | 73 |
1 files changed, 0 insertions, 73 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala deleted file mode 100644 index e5592966f1..0000000000 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// scalastyle:off println -package org.apache.spark.examples.mllib - -import org.apache.spark.SparkConf -import org.apache.spark.mllib.linalg.Vectors -import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD} -import org.apache.spark.streaming.{Seconds, StreamingContext} - -/** - * Train a linear regression model on one stream of data and make predictions - * on another stream, where the data streams arrive as text files - * into two different directories. - * - * The rows of the text files must be labeled data points in the form - * `(y,[x1,x2,x3,...,xn])` - * Where n is the number of features. n must be the same for train and test. - * - * Usage: StreamingLinearRegression <trainingDir> <testDir> <batchDuration> <numFeatures> - * - * To run on your local machine using the two directories `trainingDir` and `testDir`, - * with updates every 5 seconds, and 2 features per data point, call: - * $ bin/run-example mllib.StreamingLinearRegression trainingDir testDir 5 2 - * - * As you add text files to `trainingDir` the model will continuously update. - * Anytime you add text files to `testDir`, you'll see predictions from the current model. - * - */ -object StreamingLinearRegression { - - def main(args: Array[String]) { - - if (args.length != 4) { - System.err.println( - "Usage: StreamingLinearRegression <trainingDir> <testDir> <batchDuration> <numFeatures>") - System.exit(1) - } - - val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression") - val ssc = new StreamingContext(conf, Seconds(args(2).toLong)) - - val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse) - val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse) - - val model = new StreamingLinearRegressionWithSGD() - .setInitialWeights(Vectors.zeros(args(3).toInt)) - - model.trainOn(trainingData) - model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print() - - ssc.start() - ssc.awaitTermination() - - } - -} -// scalastyle:on println |