From 42f8847a217790a521b6dd63dfa3764378d49f83 Mon Sep 17 00:00:00 2001 From: Edison Tung Date: Thu, 1 Dec 2011 13:43:25 -0800 Subject: Revert de01b6deaaee1b43321e0aac330f4a98c0ea61c6^..HEAD --- .../scala/spark/examples/SparkLocalKMeans.scala | 73 ---------------------- 1 file changed, 73 deletions(-) delete mode 100644 examples/src/main/scala/spark/examples/SparkLocalKMeans.scala (limited to 'examples/src/main/scala') diff --git a/examples/src/main/scala/spark/examples/SparkLocalKMeans.scala b/examples/src/main/scala/spark/examples/SparkLocalKMeans.scala deleted file mode 100644 index 8d9527b7c1..0000000000 --- a/examples/src/main/scala/spark/examples/SparkLocalKMeans.scala +++ /dev/null @@ -1,73 +0,0 @@ -package spark.examples - -import java.util.Random -import Vector._ -import spark.SparkContext -import spark.SparkContext._ -import scala.collection.mutable.HashMap -import scala.collection.mutable.HashSet - -object SparkLocalKMeans { - val R = 1000 // Scaling factor - val rand = new Random(42) - - def parseVector(line: String): Vector = { - return new Vector(line.split(' ').map(_.toDouble)) - } - - def closestPoint(p: Vector, centers: HashMap[Int, Vector]): Int = { - var index = 0 - var bestIndex = 0 - var closest = Double.PositiveInfinity - - for (i <- 1 to centers.size) { - val vCurr = centers.get(i).get - val tempDist = p.squaredDist(vCurr) - if (tempDist < closest) { - closest = tempDist - bestIndex = i - } - } - - return bestIndex - } - - def main(args: Array[String]) { - if (args.length < 4) { - System.err.println("Usage: SparkLocalKMeans ") - System.exit(1) - } - val sc = new SparkContext(args(0), "SparkLocalKMeans") - val lines = sc.textFile(args(1)) - val data = lines.map(parseVector _).cache() - val K = args(2).toInt - val convergeDist = args(3).toDouble - - var points = data.sample(false, (K+1)/data.count().toDouble, 42).collect - var kPoints = new HashMap[Int, Vector] - var tempDist = 1.0 - - for (i <- 1 to points.size) { - kPoints.put(i, points(i-1)) - } - - while(tempDist > convergeDist) { - var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) - - var pointStats = closest.reduceByKey {case ((x1, y1), (x2, y2)) => (x1 + x2, y1+y2)} - - var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1/mapping._2._2)}.collect() - - tempDist = 0.0 - for (mapping <- newPoints) { - tempDist += kPoints.get(mapping._1).get.squaredDist(mapping._2) - } - - for (newP <- newPoints) { - kPoints.put(newP._1, newP._2) - } - } - - println("Final centers: " + kPoints) - } -} -- cgit v1.2.3