aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorEdison Tung <edisontung@gmail.com>2011-12-01 13:43:25 -0800
committerEdison Tung <edisontung@gmail.com>2011-12-01 13:43:25 -0800
commit42f8847a217790a521b6dd63dfa3764378d49f83 (patch)
treec1634c02ef9ce461ea49234e464aaa10071e64a7 /examples
parentde01b6deaaee1b43321e0aac330f4a98c0ea61c6 (diff)
downloadspark-42f8847a217790a521b6dd63dfa3764378d49f83.tar.gz
spark-42f8847a217790a521b6dd63dfa3764378d49f83.tar.bz2
spark-42f8847a217790a521b6dd63dfa3764378d49f83.zip
Revert de01b6deaaee1b43321e0aac330f4a98c0ea61c6^..HEAD
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/scala/spark/examples/SparkLocalKMeans.scala73
1 files changed, 0 insertions, 73 deletions
diff --git a/examples/src/main/scala/spark/examples/SparkLocalKMeans.scala b/examples/src/main/scala/spark/examples/SparkLocalKMeans.scala
deleted file mode 100644
index 8d9527b7c1..0000000000
--- a/examples/src/main/scala/spark/examples/SparkLocalKMeans.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-package spark.examples
-
-import java.util.Random
-import Vector._
-import spark.SparkContext
-import spark.SparkContext._
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-
-object SparkLocalKMeans {
- val R = 1000 // Scaling factor
- val rand = new Random(42)
-
- def parseVector(line: String): Vector = {
- return new Vector(line.split(' ').map(_.toDouble))
- }
-
- def closestPoint(p: Vector, centers: HashMap[Int, Vector]): Int = {
- var index = 0
- var bestIndex = 0
- var closest = Double.PositiveInfinity
-
- for (i <- 1 to centers.size) {
- val vCurr = centers.get(i).get
- val tempDist = p.squaredDist(vCurr)
- if (tempDist < closest) {
- closest = tempDist
- bestIndex = i
- }
- }
-
- return bestIndex
- }
-
- def main(args: Array[String]) {
- if (args.length < 4) {
- System.err.println("Usage: SparkLocalKMeans <master> <file> <k> <convergeDist>")
- System.exit(1)
- }
- val sc = new SparkContext(args(0), "SparkLocalKMeans")
- val lines = sc.textFile(args(1))
- val data = lines.map(parseVector _).cache()
- val K = args(2).toInt
- val convergeDist = args(3).toDouble
-
- var points = data.sample(false, (K+1)/data.count().toDouble, 42).collect
- var kPoints = new HashMap[Int, Vector]
- var tempDist = 1.0
-
- for (i <- 1 to points.size) {
- kPoints.put(i, points(i-1))
- }
-
- while(tempDist > convergeDist) {
- var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
-
- var pointStats = closest.reduceByKey {case ((x1, y1), (x2, y2)) => (x1 + x2, y1+y2)}
-
- var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1/mapping._2._2)}.collect()
-
- tempDist = 0.0
- for (mapping <- newPoints) {
- tempDist += kPoints.get(mapping._1).get.squaredDist(mapping._2)
- }
-
- for (newP <- newPoints) {
- kPoints.put(newP._1, newP._2)
- }
- }
-
- println("Final centers: " + kPoints)
- }
-}