diff options
author | Prashant Sharma <prashant.s@imaginea.com> | 2013-11-25 18:10:54 +0530 |
---|---|---|
committer | Prashant Sharma <prashant.s@imaginea.com> | 2013-11-25 18:10:54 +0530 |
commit | 44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd (patch) | |
tree | cd670cf460d106b5c5241d26aee4b35ccb6242aa /mllib/src | |
parent | 489862a65766d30278c186d280c6286937c81155 (diff) | |
parent | 62889c419cfddb1cea2d260e9b530349d9f8eeda (diff) | |
download | spark-44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd.tar.gz spark-44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd.tar.bz2 spark-44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd.zip |
Merge branch 'master' into scala-2.10-wip
Conflicts:
core/src/main/scala/org/apache/spark/rdd/RDD.scala
project/SparkBuild.scala
Diffstat (limited to 'mllib/src')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index edbf77dbcc..0dee9399a8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -18,15 +18,16 @@ package org.apache.spark.mllib.clustering import scala.collection.mutable.ArrayBuffer -import scala.util.Random + +import org.jblas.DoubleMatrix import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.util.XORShiftRandom -import org.jblas.DoubleMatrix /** @@ -195,7 +196,7 @@ class KMeans private ( */ private def initRandom(data: RDD[Array[Double]]): Array[ClusterCenters] = { // Sample all the cluster centers in one pass to avoid repeated scans - val sample = data.takeSample(true, runs * k, new Random().nextInt()).toSeq + val sample = data.takeSample(true, runs * k, new XORShiftRandom().nextInt()).toSeq Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).toArray) } @@ -210,7 +211,7 @@ class KMeans private ( */ private def initKMeansParallel(data: RDD[Array[Double]]): Array[ClusterCenters] = { // Initialize each run's center to a random point - val seed = new Random().nextInt() + val seed = new XORShiftRandom().nextInt() val sample = data.takeSample(true, runs, seed).toSeq val centers = Array.tabulate(runs)(r => ArrayBuffer(sample(r))) @@ -222,7 +223,7 @@ class KMeans private ( for (r <- 0 until runs) yield (r, KMeans.pointCost(centerArrays(r), point)) }.reduceByKey(_ + _).collectAsMap() val chosen = data.mapPartitionsWithIndex { (index, points) => - val rand = new Random(seed ^ (step << 16) ^ index) + val rand = new XORShiftRandom(seed ^ (step << 16) ^ index) for { p <- points r <- 0 until runs |