diff options
author | Xiangrui Meng <meng@databricks.com> | 2014-08-16 15:13:34 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2014-08-16 15:13:34 -0700 |
commit | 7e70708a99949549adde00cb6246a9582bbc4929 (patch) | |
tree | 2fe8186f0152c80892578fcda8e6d74b5bc5fcae /examples | |
parent | 76fa0eaf515fd6771cdd69422b1259485debcae5 (diff) | |
download | spark-7e70708a99949549adde00cb6246a9582bbc4929.tar.gz spark-7e70708a99949549adde00cb6246a9582bbc4929.tar.bz2 spark-7e70708a99949549adde00cb6246a9582bbc4929.zip |
[SPARK-3048][MLLIB] add LabeledPoint.parse and remove loadStreamingLabeledPoints
Move `parse()` from `LabeledPointParser` to `LabeledPoint` and make it public. This breaks binary compatibility only when a user uses synthesized methods like `tupled` and `curried`, which is rare.
`LabeledPoint.parse` is more consistent with `Vectors.parse`, which is why `LabeledPointParser` is not preferred.
freeman-lab tdas
Author: Xiangrui Meng <meng@databricks.com>
Closes #1952 from mengxr/labelparser and squashes the following commits:
c818fb2 [Xiangrui Meng] merge master
ce20e6f [Xiangrui Meng] update mima excludes
b386b8d [Xiangrui Meng] fix tests
2436b3d [Xiangrui Meng] add parse() to LabeledPoint
Diffstat (limited to 'examples')
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala index 1fd37edfa7..0e992fa996 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala @@ -18,8 +18,7 @@ package org.apache.spark.examples.mllib import org.apache.spark.mllib.linalg.Vectors -import org.apache.spark.mllib.util.MLUtils -import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD +import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD} import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} @@ -56,8 +55,8 @@ object StreamingLinearRegression { val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression") val ssc = new StreamingContext(conf, Seconds(args(2).toLong)) - val trainingData = MLUtils.loadStreamingLabeledPoints(ssc, args(0)) - val testData = MLUtils.loadStreamingLabeledPoints(ssc, args(1)) + val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse) + val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse) val model = new StreamingLinearRegressionWithSGD() .setInitialWeights(Vectors.dense(Array.fill[Double](args(3).toInt)(0))) |