aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-09-08 21:47:24 -0700
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-09-08 21:47:24 -0700
commit7a5c4b647bdd12d2a6d6285d0349680f1a848b01 (patch)
tree9f3fbe6fc947278c84c3f8af99ba97f030035e23 /docs
parent7d3204b0568e802ba15eb482c1af4c37ebf07022 (diff)
downloadspark-7a5c4b647bdd12d2a6d6285d0349680f1a848b01.tar.gz
spark-7a5c4b647bdd12d2a6d6285d0349680f1a848b01.tar.bz2
spark-7a5c4b647bdd12d2a6d6285d0349680f1a848b01.zip
Small tweaks to MLlib docs
Diffstat (limited to 'docs')
-rw-r--r--docs/mllib-guide.md18
1 files changed, 8 insertions, 10 deletions
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 1a629994cc..1583ce4a58 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -4,7 +4,7 @@ title: Machine Learning Library (MLlib)
---
MLlib is a Spark implementation of some common machine learning (ML)
-functionality, as well associated unit tests and data generators. MLlib
+functionality, as well associated tests and data generators. MLlib
currently supports four common types of machine learning problem settings,
namely, binary classification, regression, clustering and collaborative
filtering, as well as an underlying gradient descent optimization primitive.
@@ -44,22 +44,20 @@ import org.apache.spark.mllib.regression.LabeledPoint
// Load and parse the data file
val data = sc.textFile("mllib/data/sample_svm_data.txt")
-val parsedData = data.map(line => {
+val parsedData = data.map { line =>
val parts = line.split(' ')
LabeledPoint(parts(0).toDouble, parts.tail.map(x => x.toDouble).toArray)
-})
+}
// Run training algorithm
val numIterations = 20
-val model = SVMWithSGD.train(
- parsedData,
- numIterations)
+val model = SVMWithSGD.train(parsedData, numIterations)
// Evaluate model on training examples and compute training error
-val labelAndPreds = parsedData.map(r => {
- val prediction = model.predict(r.features)
- (r.label, prediction)
-})
+val labelAndPreds = parsedData.map { point =>
+ val prediction = model.predict(point.features)
+ (point.label, prediction)
+}
val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
println("trainError = " + trainErr)
{% endhighlight %}