aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala/SparkHdfsLR.scala
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2011-02-01 15:11:08 -0800
committerMatei Zaharia <matei@eecs.berkeley.edu>2011-02-01 15:11:08 -0800
commite5c4cd8a5e188592f8786a265c0cd073c69ac886 (patch)
tree031c4f814372a818f7574cfac84f1fc4749bd9d3 /examples/src/main/scala/SparkHdfsLR.scala
parentdcfa2ce83bf884008b4e8c02e923da1e49027ba4 (diff)
downloadspark-e5c4cd8a5e188592f8786a265c0cd073c69ac886.tar.gz
spark-e5c4cd8a5e188592f8786a265c0cd073c69ac886.tar.bz2
spark-e5c4cd8a5e188592f8786a265c0cd073c69ac886.zip
Made examples and core subprojects
Diffstat (limited to 'examples/src/main/scala/SparkHdfsLR.scala')
-rw-r--r--examples/src/main/scala/SparkHdfsLR.scala51
1 files changed, 51 insertions, 0 deletions
diff --git a/examples/src/main/scala/SparkHdfsLR.scala b/examples/src/main/scala/SparkHdfsLR.scala
new file mode 100644
index 0000000000..f14d48b17c
--- /dev/null
+++ b/examples/src/main/scala/SparkHdfsLR.scala
@@ -0,0 +1,51 @@
+import java.util.Random
+import scala.math.exp
+import Vector._
+import spark._
+
+object SparkHdfsLR {
+ val D = 10 // Numer of dimensions
+ val rand = new Random(42)
+
+ case class DataPoint(x: Vector, y: Double)
+
+ def parsePoint(line: String): DataPoint = {
+ //val nums = line.split(' ').map(_.toDouble)
+ //return DataPoint(new Vector(nums.slice(1, D+1)), nums(0))
+ val tok = new java.util.StringTokenizer(line, " ")
+ var y = tok.nextToken.toDouble
+ var x = new Array[Double](D)
+ var i = 0
+ while (i < D) {
+ x(i) = tok.nextToken.toDouble; i += 1
+ }
+ return DataPoint(new Vector(x), y)
+ }
+
+ def main(args: Array[String]) {
+ if (args.length < 3) {
+ System.err.println("Usage: SparkHdfsLR <master> <file> <iters>")
+ System.exit(1)
+ }
+ val sc = new SparkContext(args(0), "SparkHdfsLR")
+ val lines = sc.textFile(args(1))
+ val points = lines.map(parsePoint _).cache()
+ val ITERATIONS = args(2).toInt
+
+ // Initialize w to a random value
+ var w = Vector(D, _ => 2 * rand.nextDouble - 1)
+ println("Initial w: " + w)
+
+ for (i <- 1 to ITERATIONS) {
+ println("On iteration " + i)
+ val gradient = sc.accumulator(Vector.zeros(D))
+ for (p <- points) {
+ val scale = (1 / (1 + exp(-p.y * (w dot p.x))) - 1) * p.y
+ gradient += scale * p.x
+ }
+ w -= gradient.value
+ }
+
+ println("Final w: " + w)
+ }
+}