aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-06-21 15:58:01 +0200
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-07-05 11:13:46 -0700
commit05be233ce2716fe57cf44433d52734ded29e3506 (patch)
treeabceb6af10e85be1be5faa2e7c4c45067bf8629a
parent39ed41652beaad63c03540411a51ed82c1126e6d (diff)
downloadspark-05be233ce2716fe57cf44433d52734ded29e3506.tar.gz
spark-05be233ce2716fe57cf44433d52734ded29e3506.tar.bz2
spark-05be233ce2716fe57cf44433d52734ded29e3506.zip
Removed dependency on Apache Commons Math
-rw-r--r--ml/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala12
-rw-r--r--ml/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala12
-rw-r--r--ml/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala11
-rw-r--r--ml/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala16
-rw-r--r--project/SparkBuild.scala3
5 files changed, 27 insertions, 27 deletions
diff --git a/ml/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala b/ml/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
index 1617eac205..6d37aad047 100644
--- a/ml/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
+++ b/ml/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
@@ -1,11 +1,12 @@
package spark.ml.regression
-import spark.{RDD, SparkContext}
-import spark.ml.util.MLUtils
+import scala.util.Random
-import org.apache.commons.math3.distribution.NormalDistribution
import org.jblas.DoubleMatrix
+import spark.{RDD, SparkContext}
+import spark.ml.util.MLUtils
+
object LogisticRegressionGenerator {
def main(args: Array[String]) {
@@ -25,12 +26,11 @@ object LogisticRegressionGenerator {
val sc = new SparkContext(sparkMaster, "LogisticRegressionGenerator")
val data: RDD[(Double, Array[Double])] = sc.parallelize(0 until nexamples, parts).map { idx =>
- val rnd = new NormalDistribution(0, 1)
- rnd.reseedRandomGenerator(42 + idx)
+ val rnd = new Random(42 + idx)
val y = if (idx % 2 == 0) 0 else 1
val x = Array.fill[Double](nfeatures) {
- rnd.sample() + (y * eps)
+ rnd.nextGaussian() + (y * eps)
}
(y, x)
}
diff --git a/ml/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala b/ml/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
index ac7f1e7320..75854fe1de 100644
--- a/ml/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
+++ b/ml/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
@@ -1,11 +1,12 @@
package spark.ml.regression
-import spark.{RDD, SparkContext}
-import spark.ml.util.MLUtils
+import scala.util.Random
-import org.apache.commons.math3.distribution.NormalDistribution
import org.jblas.DoubleMatrix
+import spark.{RDD, SparkContext}
+import spark.ml.util.MLUtils
+
object RidgeRegressionGenerator {
@@ -38,10 +39,9 @@ object RidgeRegressionGenerator {
val X = DoubleMatrix.rand(examplesInPartition, nfeatures)
val y = X.mmul(w)
- val rnd = new NormalDistribution(0, eps)
- rnd.reseedRandomGenerator(42 + p)
+ val rnd = new Random(42 + p)
- val normalValues = Array.fill[Double](examplesInPartition)(rnd.sample())
+ val normalValues = Array.fill[Double](examplesInPartition)(rnd.nextGaussian() * eps)
val yObs = new DoubleMatrix(normalValues).addi(y)
Iterator.tabulate(examplesInPartition) { i =>
diff --git a/ml/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala b/ml/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
index ce388ecc26..53d9789979 100644
--- a/ml/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
+++ b/ml/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
@@ -1,22 +1,23 @@
package spark.ml.regression
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
import spark.SparkContext
import spark.SparkContext._
import spark.Logging
-import org.apache.commons.math3.distribution.NormalDistribution
-import org.scalatest.FunSuite
class LogisticRegressionSuite extends FunSuite with Logging {
// Test if we can correctly learn A, B where Y = logistic(A + B*X)
test("logistic regression") {
val nPoints = 10000
- val rnd = new NormalDistribution(0, 1)
- rnd.reseedRandomGenerator(42)
+ val rnd = new Random(42)
val sc = new SparkContext("local", "test")
- val x1 = Array.fill[Double](nPoints)(rnd.sample())
+ val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
val A = 2.0
val B = -1.5
diff --git a/ml/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala b/ml/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
index 6d5f13d6f5..795cda1379 100644
--- a/ml/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
+++ b/ml/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
@@ -1,25 +1,25 @@
package spark.ml.regression
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
import spark.SparkContext
import spark.SparkContext._
-import org.apache.commons.math3.distribution.NormalDistribution
-import org.scalatest.FunSuite
class RidgeRegressionSuite extends FunSuite {
// Test if we can correctly learn Y = 3 + X1 + X2 when
// X1 and X2 are collinear.
test("multi-collinear variables") {
- val rnd = new NormalDistribution(0, 1)
- rnd.reseedRandomGenerator(43)
+ val rnd = new Random(43)
val sc = new SparkContext("local", "test")
- val x1 = Array.fill[Double](20)(rnd.sample())
+ val x1 = Array.fill[Double](20)(rnd.nextGaussian())
// Pick a mean close to mean of x1
- val rnd1 = new NormalDistribution(0.1, 0.01)
- rnd1.reseedRandomGenerator(42)
- val x2 = Array.fill[Double](20)(rnd1.sample())
+ val rnd1 = new Random(42) //new NormalDistribution(0.1, 0.01)
+ val x2 = Array.fill[Double](20)(0.1 + rnd1.nextGaussian() * 0.01)
val xMat = (0 until 20).map(i => Array(x1(i), x2(i))).toArray
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index aa877ad4a7..5dbb5d4a65 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -224,8 +224,7 @@ object SparkBuild extends Build {
def mlSettings = sharedSettings ++ Seq(
name := "spark-ml",
libraryDependencies ++= Seq(
- "org.jblas" % "jblas" % "1.2.3",
- "org.apache.commons" % "commons-math3" % "3.2"
+ "org.jblas" % "jblas" % "1.2.3"
)
)