aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-08-16 15:13:34 -0700
committerXiangrui Meng <meng@databricks.com>2014-08-16 15:13:34 -0700
commit7e70708a99949549adde00cb6246a9582bbc4929 (patch)
tree2fe8186f0152c80892578fcda8e6d74b5bc5fcae /examples
parent76fa0eaf515fd6771cdd69422b1259485debcae5 (diff)
downloadspark-7e70708a99949549adde00cb6246a9582bbc4929.tar.gz
spark-7e70708a99949549adde00cb6246a9582bbc4929.tar.bz2
spark-7e70708a99949549adde00cb6246a9582bbc4929.zip
[SPARK-3048][MLLIB] add LabeledPoint.parse and remove loadStreamingLabeledPoints
Move `parse()` from `LabeledPointParser` to `LabeledPoint` and make it public. This breaks binary compatibility only when a user uses synthesized methods like `tupled` and `curried`, which is rare. `LabeledPoint.parse` is more consistent with `Vectors.parse`, which is why `LabeledPointParser` is not preferred. freeman-lab tdas Author: Xiangrui Meng <meng@databricks.com> Closes #1952 from mengxr/labelparser and squashes the following commits: c818fb2 [Xiangrui Meng] merge master ce20e6f [Xiangrui Meng] update mima excludes b386b8d [Xiangrui Meng] fix tests 2436b3d [Xiangrui Meng] add parse() to LabeledPoint
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala7
1 files changed, 3 insertions, 4 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
index 1fd37edfa7..0e992fa996 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegression.scala
@@ -18,8 +18,7 @@
package org.apache.spark.examples.mllib
import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
+import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
@@ -56,8 +55,8 @@ object StreamingLinearRegression {
val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression")
val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
- val trainingData = MLUtils.loadStreamingLabeledPoints(ssc, args(0))
- val testData = MLUtils.loadStreamingLabeledPoints(ssc, args(1))
+ val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse)
+ val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
val model = new StreamingLinearRegressionWithSGD()
.setInitialWeights(Vectors.dense(Array.fill[Double](args(3).toInt)(0)))