aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorPrashant Sharma <prashant.s@imaginea.com>2013-11-25 18:10:54 +0530
committerPrashant Sharma <prashant.s@imaginea.com>2013-11-25 18:10:54 +0530
commit44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd (patch)
treecd670cf460d106b5c5241d26aee4b35ccb6242aa /mllib
parent489862a65766d30278c186d280c6286937c81155 (diff)
parent62889c419cfddb1cea2d260e9b530349d9f8eeda (diff)
downloadspark-44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd.tar.gz
spark-44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd.tar.bz2
spark-44fd30d3fbcf830deecbe8ea3e8ea165e74e6edd.zip
Merge branch 'master' into scala-2.10-wip
Conflicts: core/src/main/scala/org/apache/spark/rdd/RDD.scala project/SparkBuild.scala
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala11
1 files changed, 6 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index edbf77dbcc..0dee9399a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -18,15 +18,16 @@
package org.apache.spark.mllib.clustering
import scala.collection.mutable.ArrayBuffer
-import scala.util.Random
+
+import org.jblas.DoubleMatrix
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.apache.spark.Logging
import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.util.XORShiftRandom
-import org.jblas.DoubleMatrix
/**
@@ -195,7 +196,7 @@ class KMeans private (
*/
private def initRandom(data: RDD[Array[Double]]): Array[ClusterCenters] = {
// Sample all the cluster centers in one pass to avoid repeated scans
- val sample = data.takeSample(true, runs * k, new Random().nextInt()).toSeq
+ val sample = data.takeSample(true, runs * k, new XORShiftRandom().nextInt()).toSeq
Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).toArray)
}
@@ -210,7 +211,7 @@ class KMeans private (
*/
private def initKMeansParallel(data: RDD[Array[Double]]): Array[ClusterCenters] = {
// Initialize each run's center to a random point
- val seed = new Random().nextInt()
+ val seed = new XORShiftRandom().nextInt()
val sample = data.takeSample(true, runs, seed).toSeq
val centers = Array.tabulate(runs)(r => ArrayBuffer(sample(r)))
@@ -222,7 +223,7 @@ class KMeans private (
for (r <- 0 until runs) yield (r, KMeans.pointCost(centerArrays(r), point))
}.reduceByKey(_ + _).collectAsMap()
val chosen = data.mapPartitionsWithIndex { (index, points) =>
- val rand = new Random(seed ^ (step << 16) ^ index)
+ val rand = new XORShiftRandom(seed ^ (step << 16) ^ index)
for {
p <- points
r <- 0 until runs