aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/java/spark/mllib/regression/JavaLassoSuite.java11
-rw-r--r--mllib/src/test/java/spark/mllib/regression/JavaLinearRegressionSuite.java9
-rw-r--r--mllib/src/test/java/spark/mllib/regression/JavaRidgeRegressionSuite.java9
-rw-r--r--mllib/src/test/scala/spark/mllib/regression/LassoSuite.scala39
-rw-r--r--mllib/src/test/scala/spark/mllib/regression/LinearRegressionSuite.scala38
-rw-r--r--mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala38
6 files changed, 27 insertions, 117 deletions
diff --git a/mllib/src/test/java/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/spark/mllib/regression/JavaLassoSuite.java
index e26d7b385c..8d692c2d0d 100644
--- a/mllib/src/test/java/spark/mllib/regression/JavaLassoSuite.java
+++ b/mllib/src/test/java/spark/mllib/regression/JavaLassoSuite.java
@@ -27,6 +27,7 @@ import org.junit.Test;
import spark.api.java.JavaRDD;
import spark.api.java.JavaSparkContext;
+import spark.mllib.util.LinearDataGenerator;
public class JavaLassoSuite implements Serializable {
private transient JavaSparkContext sc;
@@ -61,10 +62,10 @@ public class JavaLassoSuite implements Serializable {
double A = 2.0;
double[] weights = {-1.5, 1.0e-2};
- JavaRDD<LabeledPoint> testRDD = sc.parallelize(LassoSuite.generateLassoInputAsList(A,
- weights, nPoints, 42), 2).cache();
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
+ weights, nPoints, 42), 2).cache();
List<LabeledPoint> validationData =
- LassoSuite.generateLassoInputAsList(A, weights, nPoints, 17);
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17);
LassoWithSGD svmSGDImpl = new LassoWithSGD();
svmSGDImpl.optimizer().setStepSize(1.0)
@@ -82,10 +83,10 @@ public class JavaLassoSuite implements Serializable {
double A = 2.0;
double[] weights = {-1.5, 1.0e-2};
- JavaRDD<LabeledPoint> testRDD = sc.parallelize(LassoSuite.generateLassoInputAsList(A,
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
weights, nPoints, 42), 2).cache();
List<LabeledPoint> validationData =
- LassoSuite.generateLassoInputAsList(A, weights, nPoints, 17);
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17);
LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
diff --git a/mllib/src/test/java/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/spark/mllib/regression/JavaLinearRegressionSuite.java
index 14d3d4ef39..d2d8a62980 100644
--- a/mllib/src/test/java/spark/mllib/regression/JavaLinearRegressionSuite.java
+++ b/mllib/src/test/java/spark/mllib/regression/JavaLinearRegressionSuite.java
@@ -27,6 +27,7 @@ import org.junit.Test;
import spark.api.java.JavaRDD;
import spark.api.java.JavaSparkContext;
+import spark.mllib.util.LinearDataGenerator;
public class JavaLinearRegressionSuite implements Serializable {
private transient JavaSparkContext sc;
@@ -61,10 +62,10 @@ public class JavaLinearRegressionSuite implements Serializable {
double A = 2.0;
double[] weights = {-1.5, 1.0e-2};
- JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearRegressionSuite.generateLinearRegressionInputAsList(A,
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
weights, nPoints, 42), 2).cache();
List<LabeledPoint> validationData =
- LinearRegressionSuite.generateLinearRegressionInputAsList(A, weights, nPoints, 17);
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17);
LinearRegressionWithSGD svmSGDImpl = new LinearRegressionWithSGD();
svmSGDImpl.optimizer().setStepSize(1.0)
@@ -82,10 +83,10 @@ public class JavaLinearRegressionSuite implements Serializable {
double A = 2.0;
double[] weights = {-1.5, 1.0e-2};
- JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearRegressionSuite.generateLinearRegressionInputAsList(A,
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
weights, nPoints, 42), 2).cache();
List<LabeledPoint> validationData =
- LinearRegressionSuite.generateLinearRegressionInputAsList(A, weights, nPoints, 17);
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17);
LinearRegressionModel model = LinearRegressionWithSGD.train(testRDD.rdd(), 100, 1.0, 1.0);
diff --git a/mllib/src/test/java/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 4f379b51d5..72ab875985 100644
--- a/mllib/src/test/java/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -27,6 +27,7 @@ import org.junit.Test;
import spark.api.java.JavaRDD;
import spark.api.java.JavaSparkContext;
+import spark.mllib.util.LinearDataGenerator;
public class JavaRidgeRegressionSuite implements Serializable {
private transient JavaSparkContext sc;
@@ -61,10 +62,10 @@ public class JavaRidgeRegressionSuite implements Serializable {
double A = 2.0;
double[] weights = {-1.5, 1.0e-2};
- JavaRDD<LabeledPoint> testRDD = sc.parallelize(RidgeRegressionSuite.generateRidgeRegressionInputAsList(A,
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
weights, nPoints, 42), 2).cache();
List<LabeledPoint> validationData =
- RidgeRegressionSuite.generateRidgeRegressionInputAsList(A, weights, nPoints, 17);
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17);
RidgeRegressionWithSGD svmSGDImpl = new RidgeRegressionWithSGD();
svmSGDImpl.optimizer().setStepSize(1.0)
@@ -82,10 +83,10 @@ public class JavaRidgeRegressionSuite implements Serializable {
double A = 2.0;
double[] weights = {-1.5, 1.0e-2};
- JavaRDD<LabeledPoint> testRDD = sc.parallelize(RidgeRegressionSuite.generateRidgeRegressionInputAsList(A,
+ JavaRDD<LabeledPoint> testRDD = sc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
weights, nPoints, 42), 2).cache();
List<LabeledPoint> validationData =
- RidgeRegressionSuite.generateRidgeRegressionInputAsList(A, weights, nPoints, 17);
+ LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17);
RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
diff --git a/mllib/src/test/scala/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/spark/mllib/regression/LassoSuite.scala
index 55a738f1e4..622dbbab7f 100644
--- a/mllib/src/test/scala/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/spark/mllib/regression/LassoSuite.scala
@@ -24,37 +24,8 @@ import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import spark.SparkContext
+import spark.mllib.util.LinearDataGenerator
-import org.jblas.DoubleMatrix
-
-object LassoSuite {
-
- def generateLassoInputAsList(
- intercept: Double,
- weights: Array[Double],
- nPoints: Int,
- seed: Int): java.util.List[LabeledPoint] = {
- seqAsJavaList(generateLassoInput(intercept, weights, nPoints, seed))
- }
-
-
- // Generate noisy input of the form Y = x.dot(weights) + intercept + noise
- def generateLassoInput(
- intercept: Double,
- weights: Array[Double],
- nPoints: Int,
- seed: Int): Seq[LabeledPoint] = {
- val rnd = new Random(seed)
- val weightsMat = new DoubleMatrix(1, weights.length, weights:_*)
- val x = Array.fill[Array[Double]](nPoints)(
- Array.fill[Double](weights.length)(rnd.nextGaussian()))
- val y = x.map(xi =>
- (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + 0.1 * rnd.nextGaussian()
- )
- y.zip(x).map(p => LabeledPoint(p._1, p._2))
- }
-
-}
class LassoSuite extends FunSuite with BeforeAndAfterAll {
@transient private var sc: SparkContext = _
@@ -85,7 +56,7 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
val B = -1.5
val C = 1.0e-2
- val testData = LassoSuite.generateLassoInput(A, Array[Double](B,C), nPoints, 42)
+ val testData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 42)
val testRDD = sc.parallelize(testData, 2)
testRDD.cache()
@@ -101,7 +72,7 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
assert(weight1 >= -1.0e-3 && weight1 <= 1.0e-3, weight1 + " not in [-0.001, 0.001]")
- val validationData = LassoSuite.generateLassoInput(A, Array[Double](B,C), nPoints, 17)
+ val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17)
val validationRDD = sc.parallelize(validationData, 2)
// Test prediction on RDD.
@@ -118,7 +89,7 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
val B = -1.5
val C = 1.0e-2
- val testData = LassoSuite.generateLassoInput(A, Array[Double](B,C), nPoints, 42)
+ val testData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 42)
val initialB = -1.0
val initialC = -1.0
@@ -138,7 +109,7 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
assert(weight1 >= -1.0e-3 && weight1 <= 1.0e-3, weight1 + " not in [-0.001, 0.001]")
- val validationData = LassoSuite.generateLassoInput(A, Array[Double](B,C), nPoints, 17)
+ val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17)
val validationRDD = sc.parallelize(validationData,2)
// Test prediction on RDD.
diff --git a/mllib/src/test/scala/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/LinearRegressionSuite.scala
index c794c1cac5..3d22b7d385 100644
--- a/mllib/src/test/scala/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/mllib/regression/LinearRegressionSuite.scala
@@ -17,46 +17,12 @@
package spark.mllib.regression
-import scala.collection.JavaConversions._
-import scala.util.Random
-
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import spark.SparkContext
import spark.SparkContext._
-import spark.mllib.util.LinearRegressionDataGenerator
-import spark.mllib.regression.LabeledPoint
-import org.jblas.DoubleMatrix
-
-object LinearRegressionSuite {
-
- def generateLinearRegressionInputAsList(
- intercept: Double,
- weights: Array[Double],
- nPoints: Int,
- seed: Int): java.util.List[LabeledPoint] = {
- seqAsJavaList(generateLinearRegressionInput(intercept, weights, nPoints, seed))
- }
-
-
- // Generate noisy input of the form Y = x.dot(weights) + intercept + noise
- def generateLinearRegressionInput(
- intercept: Double,
- weights: Array[Double],
- nPoints: Int,
- seed: Int): Seq[LabeledPoint] = {
- val rnd = new Random(seed)
- val weightsMat = new DoubleMatrix(1, weights.length, weights:_*)
- val x = Array.fill[Array[Double]](nPoints)(
- Array.fill[Double](weights.length)(rnd.nextGaussian()))
- val y = x.map(xi =>
- (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + 0.1 * rnd.nextGaussian()
- )
- y.zip(x).map(p => LabeledPoint(p._1, p._2))
- }
-
-}
+import spark.mllib.util.LinearDataGenerator
class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
@transient private var sc: SparkContext = _
@@ -73,7 +39,7 @@ class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
// Test if we can correctly learn Y = 3 + 10*X1 + 10*X2 when
// X1 and X2 are collinear.
test("multi-collinear variables") {
- val testRDD = LinearRegressionDataGenerator.generateLinearRDD(sc, 100, 2, 0.0, intercept=3.0).cache()
+ val testRDD = LinearDataGenerator.generateLinearRDD(sc, 100, 2, 0.0, Array(10.0, 10.0), intercept=3.0).cache()
val linReg = new LinearRegressionWithSGD()
linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
diff --git a/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala
index aaac083ad9..0237ccdf87 100644
--- a/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -25,37 +25,7 @@ import org.scalatest.FunSuite
import spark.SparkContext
import spark.SparkContext._
-import spark.mllib.util.RidgeRegressionDataGenerator
-import org.jblas.DoubleMatrix
-
-object RidgeRegressionSuite {
-
- def generateRidgeRegressionInputAsList(
- intercept: Double,
- weights: Array[Double],
- nPoints: Int,
- seed: Int): java.util.List[LabeledPoint] = {
- seqAsJavaList(generateRidgeRegressionInput(intercept, weights, nPoints, seed))
- }
-
-
- // Generate noisy input of the form Y = x.dot(weights) + intercept + noise
- def generateRidgeRegressionInput(
- intercept: Double,
- weights: Array[Double],
- nPoints: Int,
- seed: Int): Seq[LabeledPoint] = {
- val rnd = new Random(seed)
- val weightsMat = new DoubleMatrix(1, weights.length, weights:_*)
- val x = Array.fill[Array[Double]](nPoints)(
- Array.fill[Double](weights.length)(rnd.nextGaussian()))
- val y = x.map(xi =>
- (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + 0.1 * rnd.nextGaussian()
- )
- y.zip(x).map(p => LabeledPoint(p._1, p._2))
- }
-
-}
+import spark.mllib.util.LinearDataGenerator
class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
@@ -73,7 +43,7 @@ class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
// Test if we can correctly learn Y = 3 + 10*X1 + 10*X2 when
// X1 and X2 are collinear.
test("multi-collinear variables") {
- val testRDD = RidgeRegressionDataGenerator.generateRidgeRDD(sc, 100, 2, 0.0, intercept=3.0).cache()
+ val testRDD = LinearDataGenerator.generateLinearRDD(sc, 100, 2, 0.0, Array(10.0, 10.0), intercept=3.0).cache()
val ridgeReg = new RidgeRegressionWithSGD()
ridgeReg.optimizer.setNumIterations(1000).setRegParam(0.0).setStepSize(1.0)
@@ -86,7 +56,7 @@ class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
}
test("multi-collinear variables with regularization") {
- val testRDD = RidgeRegressionDataGenerator.generateRidgeRDD(sc, 100, 2, 0.0, intercept=3.0).cache()
+ val testRDD = LinearDataGenerator.generateLinearRDD(sc, 100, 2, 0.0, Array(10.0, 10.0), intercept=3.0).cache()
val ridgeReg = new RidgeRegressionWithSGD()
ridgeReg.optimizer.setNumIterations(1000).setRegParam(1.0).setStepSize(1.0)
@@ -94,7 +64,7 @@ class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
assert(model.intercept <= 5.0)
assert(model.weights.length === 2)
- assert(model.weights(0) <= 3.0)
+ assert(model.weights(0) <= 4.0)
assert(model.weights(1) <= 3.0)
}
}