aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala/spark/mllib/classification/SVM.scala
diff options
context:
space:
mode:
Diffstat (limited to 'mllib/src/main/scala/spark/mllib/classification/SVM.scala')
-rw-r--r--mllib/src/main/scala/spark/mllib/classification/SVM.scala21
1 files changed, 16 insertions, 5 deletions
diff --git a/mllib/src/main/scala/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/spark/mllib/classification/SVM.scala
index f799cb2829..d2b50f4987 100644
--- a/mllib/src/main/scala/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/SVM.scala
@@ -18,10 +18,12 @@
package spark.mllib.classification
import scala.math.signum
+
import spark.{Logging, RDD, SparkContext}
import spark.mllib.optimization._
import spark.mllib.regression._
import spark.mllib.util.MLUtils
+import spark.mllib.util.DataValidators
import org.jblas.DoubleMatrix
@@ -45,6 +47,7 @@ class SVMModel(
/**
* Train an SVM using Stochastic Gradient Descent.
+ * NOTE: Labels used in SVM should be {0, 1}
*/
class SVMWithSGD private (
var stepSize: Double,
@@ -56,10 +59,14 @@ class SVMWithSGD private (
val gradient = new HingeGradient()
val updater = new SquaredL2Updater()
- val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
- .setNumIterations(numIterations)
- .setRegParam(regParam)
- .setMiniBatchFraction(miniBatchFraction)
+ override val optimizer = new GradientDescent(gradient, updater)
+ .setStepSize(stepSize)
+ .setNumIterations(numIterations)
+ .setRegParam(regParam)
+ .setMiniBatchFraction(miniBatchFraction)
+
+ override val validateFuncs = List(DataValidators.classificationLabels)
+
/**
* Construct a SVM object with default parameters
*/
@@ -71,7 +78,7 @@ class SVMWithSGD private (
}
/**
- * Top-level methods for calling SVM.
+ * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}
*/
object SVMWithSGD {
@@ -80,6 +87,7 @@ object SVMWithSGD {
* of iterations of gradient descent using the specified step size. Each iteration uses
* `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
* gradient descent are initialized using the initial weights provided.
+ * NOTE: Labels used in SVM should be {0, 1}
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
@@ -106,6 +114,7 @@ object SVMWithSGD {
* Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. Each iteration uses
* `miniBatchFraction` fraction of the data to calculate the gradient.
+ * NOTE: Labels used in SVM should be {0, 1}
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
@@ -128,6 +137,7 @@ object SVMWithSGD {
* Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using the specified step size. We use the entire data set to
* update the gradient in each iteration.
+ * NOTE: Labels used in SVM should be {0, 1}
*
* @param input RDD of (label, array of features) pairs.
* @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -149,6 +159,7 @@ object SVMWithSGD {
* Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
* of iterations of gradient descent using a step size of 1.0. We use the entire data set to
* update the gradient in each iteration.
+ * NOTE: Labels used in SVM should be {0, 1}
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.