aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/mllib-guide.md18
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala13
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala4
3 files changed, 17 insertions, 18 deletions
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 1a629994cc..1583ce4a58 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -4,7 +4,7 @@ title: Machine Learning Library (MLlib)
---
MLlib is a Spark implementation of some common machine learning (ML)
-functionality, as well associated unit tests and data generators. MLlib
+functionality, as well associated tests and data generators. MLlib
currently supports four common types of machine learning problem settings,
namely, binary classification, regression, clustering and collaborative
filtering, as well as an underlying gradient descent optimization primitive.
@@ -44,22 +44,20 @@ import org.apache.spark.mllib.regression.LabeledPoint
// Load and parse the data file
val data = sc.textFile("mllib/data/sample_svm_data.txt")
-val parsedData = data.map(line => {
+val parsedData = data.map { line =>
val parts = line.split(' ')
LabeledPoint(parts(0).toDouble, parts.tail.map(x => x.toDouble).toArray)
-})
+}
// Run training algorithm
val numIterations = 20
-val model = SVMWithSGD.train(
- parsedData,
- numIterations)
+val model = SVMWithSGD.train(parsedData, numIterations)
// Evaluate model on training examples and compute training error
-val labelAndPreds = parsedData.map(r => {
- val prediction = model.predict(r.features)
- (r.label, prediction)
-})
+val labelAndPreds = parsedData.map { point =>
+ val prediction = model.predict(point.features)
+ (point.label, prediction)
+}
val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
println("trainError = " + trainErr)
{% endhighlight %}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 3511e24bce..3b8f8550d0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -29,7 +29,7 @@ import org.apache.spark.mllib.util.DataValidators
import org.jblas.DoubleMatrix
/**
- * Model built using SVM.
+ * Model for Support Vector Machines (SVMs).
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
@@ -48,8 +48,8 @@ class SVMModel(
}
/**
- * Train an SVM using Stochastic Gradient Descent.
- * NOTE: Labels used in SVM should be {0, 1}
+ * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
+ * NOTE: Labels used in SVM should be {0, 1}.
*/
class SVMWithSGD private (
var stepSize: Double,
@@ -79,7 +79,7 @@ class SVMWithSGD private (
}
/**
- * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}
+ * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}.
*/
object SVMWithSGD {
@@ -88,14 +88,15 @@ object SVMWithSGD {
* of iterations of gradient descent using the specified step size. Each iteration uses
* `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
* gradient descent are initialized using the initial weights provided.
- * NOTE: Labels used in SVM should be {0, 1}
+ *
+ * NOTE: Labels used in SVM should be {0, 1}.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
- * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*/
def train(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index ae95ea24fc..597d55e0bb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -43,7 +43,7 @@ class LinearRegressionModel(
}
/**
- * Train a regression model with no regularization using Stochastic Gradient Descent.
+ * Train a linear regression model with no regularization using Stochastic Gradient Descent.
*/
class LinearRegressionWithSGD private (
var stepSize: Double,
@@ -83,7 +83,7 @@ object LinearRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
- * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*/
def train(