diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-09-08 21:47:24 -0700 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-09-08 21:47:24 -0700 |
commit | 7a5c4b647bdd12d2a6d6285d0349680f1a848b01 (patch) | |
tree | 9f3fbe6fc947278c84c3f8af99ba97f030035e23 /docs | |
parent | 7d3204b0568e802ba15eb482c1af4c37ebf07022 (diff) | |
download | spark-7a5c4b647bdd12d2a6d6285d0349680f1a848b01.tar.gz spark-7a5c4b647bdd12d2a6d6285d0349680f1a848b01.tar.bz2 spark-7a5c4b647bdd12d2a6d6285d0349680f1a848b01.zip |
Small tweaks to MLlib docs
Diffstat (limited to 'docs')
-rw-r--r-- | docs/mllib-guide.md | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 1a629994cc..1583ce4a58 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -4,7 +4,7 @@ title: Machine Learning Library (MLlib) --- MLlib is a Spark implementation of some common machine learning (ML) -functionality, as well associated unit tests and data generators. MLlib +functionality, as well associated tests and data generators. MLlib currently supports four common types of machine learning problem settings, namely, binary classification, regression, clustering and collaborative filtering, as well as an underlying gradient descent optimization primitive. @@ -44,22 +44,20 @@ import org.apache.spark.mllib.regression.LabeledPoint // Load and parse the data file val data = sc.textFile("mllib/data/sample_svm_data.txt") -val parsedData = data.map(line => { +val parsedData = data.map { line => val parts = line.split(' ') LabeledPoint(parts(0).toDouble, parts.tail.map(x => x.toDouble).toArray) -}) +} // Run training algorithm val numIterations = 20 -val model = SVMWithSGD.train( - parsedData, - numIterations) +val model = SVMWithSGD.train(parsedData, numIterations) // Evaluate model on training examples and compute training error -val labelAndPreds = parsedData.map(r => { - val prediction = model.predict(r.features) - (r.label, prediction) -}) +val labelAndPreds = parsedData.map { point => + val prediction = model.predict(point.features) + (point.label, prediction) +} val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count println("trainError = " + trainErr) {% endhighlight %} |