From e9bd6cb51dce9222a5a284cd171b299b0169852b Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Sat, 4 Jan 2014 12:33:22 -0800
Subject: new example file

---
 .../scala/org/apache/spark/examples/SparkSVD.scala | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala

(limited to 'examples')
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
new file mode 100644
index 0000000000..5590ee728a
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+      
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg.SVD
+import org.apache.spark.mllib.linalg.MatrixEntry
+
+/**
+ * Compute SVD of an example matrix
+ * Input file should be comma separated, 1 indexed of the form
+ * i,j,value
+ * Where i is the column, j the row, and value is the matrix entry
+ * 
+ * For example input file, see:
+ * mllib/data/als/test.data
+ */
+object SparkSVD {
+  def main(args: Array[String]) {
+   if (args.length < 3) {
+      System.err.println("Usage: SVD <master> <file>")
+      System.exit(1)
+    }
+    val sc = new SparkContext(args(0), "SVD",
+      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+
+    // Load and parse the data file
+    val data = sc.textFile(args(1)).map { line =>
+      val parts = line.split(',')
+      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
+    }
+    val m = 4
+    val n = 4
+
+    // recover largest singular vector
+    val decomposed = SVD.sparseSVD(data, m, n, 1)
+    val u = decomposed.U
+    val s = decomposed.S
+    val v = decomposed.V
+
+    println("singular values = " + s.toArray.mkString)
+  }
+}
-- 
cgit v1.2.3


From 06c0f7628a213a08ef5adeab903160b806680acf Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Sat, 4 Jan 2014 14:28:07 -0800
Subject: use SparseMatrix everywhere

---
 .../scala/org/apache/spark/examples/SparkSVD.scala |  9 +--
 .../scala/org/apache/spark/mllib/linalg/SVD.scala  | 67 +++++++---------------
 .../spark/mllib/linalg/SVDecomposedMatrix.scala    |  8 +--
 .../apache/spark/mllib/linalg/SparseMatrix.scala   | 30 ++++++++++
 .../org/apache/spark/mllib/linalg/SVDSuite.scala   | 50 +++++++++-------
 5 files changed, 89 insertions(+), 75 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala

(limited to 'examples')

diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
index 5590ee728a..4b9e674c68 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
@@ -20,6 +20,7 @@ package org.apache.spark.examples
 import org.apache.spark.SparkContext
 import org.apache.spark.mllib.linalg.SVD
 import org.apache.spark.mllib.linalg.MatrixEntry
+import org.apache.spark.mllib.linalg.SparseMatrix
 
 /**
  * Compute SVD of an example matrix
@@ -48,10 +49,10 @@ object SparkSVD {
     val n = 4
 
     // recover largest singular vector
-    val decomposed = SVD.sparseSVD(data, m, n, 1)
-    val u = decomposed.U
-    val s = decomposed.S
-    val v = decomposed.V
+    val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1)
+    val u = decomposed.U.data
+    val s = decomposed.S.data
+    val v = decomposed.V.data
 
     println("singular values = " + s.toArray.mkString)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
index 31990b0223..a8efdc787e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
@@ -26,11 +26,8 @@ import org.jblas.{DoubleMatrix, Singular, MatrixFunctions}
 
 /**
  * Class used to obtain singular value decompositions
- * @param data Matrix in sparse matrix format
- * @param m number of rows
- * @param n number of columns
  */
-class SVD(var data: RDD[MatrixEntry], var m: Int, var n: Int) {
+class SVD {
   private var k: Int = 1
 
   /**
@@ -41,35 +38,11 @@ class SVD(var data: RDD[MatrixEntry], var m: Int, var n: Int) {
     this
   }
 
-  /**
-   * Set matrix to be used for SVD
-   */
-  def setDatadata(data: RDD[MatrixEntry]): this.type = {
-    this.data = data
-    this
-  }
-
-  /**
-   * Set dimensions of matrix: rows
-   */
-  def setNumRows(m: Int): this.type = {
-    this.m = m
-    this
-  }
-
-  /**
-   * Set dimensions of matrix: columns
-   */
-  def setNumCols(n: Int): this.type = {
-    this.n = n
-    this
-  }
-
    /**
    * Compute SVD using the current set parameters
    */
-  def computeSVD() : SVDecomposedMatrix = {
-    SVD.sparseSVD(data, m, n, k)
+  def computeSVD(matrix: SparseMatrix) : SVDecomposedMatrix = {
+    SVD.sparseSVD(matrix, k)
   }
 }
 
@@ -103,19 +76,19 @@ object SVD {
  * All input and output is expected in sparse matrix format, 1-indexed
  * as tuples of the form ((i,j),value) all in RDDs
  *
- * @param data RDD Matrix in sparse 1-index format ((int, int), value)
- * @param m number of rows
- * @param n number of columns
+ * @param matrix sparse matrix to factorize
  * @param k Recover k singular values and vectors
  * @return Three sparse matrices: U, S, V such that A = USV^T
  */
   def sparseSVD(
-      data: RDD[MatrixEntry],
-      m: Int,
-      n: Int,
+      matrix: SparseMatrix,
       k: Int)
     : SVDecomposedMatrix =
   {
+    val data = matrix.data
+    val m = matrix.m
+    val n = matrix.n
+
     if (m < n || m <= 0 || n <= 0) {
       throw new IllegalArgumentException("Expecting a tall and skinny matrix")
     }
@@ -153,13 +126,16 @@ object SVD {
     val sc = data.sparkContext
 
     // prepare V for returning
-    val retV = sc.makeRDD(
+    val retVdata = sc.makeRDD(
             Array.tabulate(V.rows, sigma.length){ (i,j) =>
                     MatrixEntry(i + 1, j + 1, V.get(i,j)) }.flatten)
-
-    val retS = sc.makeRDD(Array.tabulate(sigma.length){
+    val retV = SparseMatrix(retVdata, V.rows, sigma.length)
+     
+    val retSdata = sc.makeRDD(Array.tabulate(sigma.length){
       x => MatrixEntry(x + 1, x + 1, sigma(x))})
 
+    val retS = SparseMatrix(retSdata, sigma.length, sigma.length)
+
     // Compute U as U = A V S^-1
     // turn V S^-1 into an RDD as a sparse matrix
     val vsirdd = sc.makeRDD(Array.tabulate(V.rows, sigma.length)
@@ -168,10 +144,11 @@ object SVD {
     // Multiply A by VS^-1
     val aCols = data.map(entry => (entry.j, (entry.i, entry.mval)))
     val bRows = vsirdd.map(entry => (entry._1._1, (entry._1._2, entry._2)))
-    val retU = aCols.join(bRows).map( {case (key, ( (rowInd, rowVal), (colInd, colVal)) )
+    val retUdata = aCols.join(bRows).map( {case (key, ( (rowInd, rowVal), (colInd, colVal)) )
         => ((rowInd, colInd), rowVal*colVal)}).reduceByKey(_+_)
           .map{ case ((row, col), mval) => MatrixEntry(row, col, mval)}
-     
+    val retU = SparseMatrix(retUdata, m, sigma.length) 
+   
     SVDecomposedMatrix(retU, retS, retV)  
   }
 
@@ -195,10 +172,10 @@ object SVD {
       MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
     }
 
-    val decomposed = SVD.sparseSVD(data, m, n, k)
-    val u = decomposed.U
-    val s = decomposed.S
-    val v = decomposed.V
+    val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), k)
+    val u = decomposed.U.data
+    val s = decomposed.S.data
+    val v = decomposed.V.data
     
     println("Computed " + s.toArray.length + " singular values and vectors")
     u.saveAsTextFile(output_u)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala
index e0bcdab2d2..622003576d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.linalg
 
-import org.apache.spark.rdd.RDD
-
 /**
  * Class that represents the SV decomposition of a matrix
  *
@@ -26,6 +24,6 @@ import org.apache.spark.rdd.RDD
  * @param S such that A = USV^T
  * @param V such that A = USV^T
  */
-case class SVDecomposedMatrix(val U: RDD[MatrixEntry],
-                              val S: RDD[MatrixEntry],
-                              val V: RDD[MatrixEntry])
+case class SVDecomposedMatrix(val U: SparseMatrix,
+                              val S: SparseMatrix,
+                              val V: SparseMatrix)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala
new file mode 100644
index 0000000000..cbd1a2a5a4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
+import org.apache.spark.rdd.RDD
+
+
+/**
+ * Class that represents a sparse matrix
+ *
+ * @param data RDD of nonzero entries
+ * @param m number of rows
+ * @param n numner of columns
+ */
+case class SparseMatrix(val data: RDD[MatrixEntry], val m: Int, val n: Int)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
index 4126e819e3..f239e8505f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
@@ -45,9 +45,12 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
   val EPSILON = 1e-4
 
   // Return jblas matrix from sparse matrix RDD
-  def getDenseMatrix(matrix:RDD[MatrixEntry], m:Int, n:Int) : DoubleMatrix = {
+  def getDenseMatrix(matrix:SparseMatrix) : DoubleMatrix = {
+    val data = matrix.data
+    val m = matrix.m
+    val n = matrix.n
     val ret = DoubleMatrix.zeros(m, n)
-    matrix.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval))
+    matrix.data.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval))
     ret
   }
 
@@ -67,24 +70,26 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) =>
       MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten )
 
-    val decomposed = SVD.sparseSVD(data, m, n, n)
+    val a = SparseMatrix(data, m, n)
+
+    val decomposed = SVD.sparseSVD(a, n)
     val u = decomposed.U
     val v = decomposed.V
-    val s = decomposed.S    
+    val s = decomposed.S
 
-    val densea = getDenseMatrix(data, m, n)
+    val densea = getDenseMatrix(a)
     val svd = Singular.sparseSVD(densea)
 
-    val retu = getDenseMatrix(u, m, n)
-    val rets = getDenseMatrix(s, n, n)
-    val retv = getDenseMatrix(v, n, n)
+    val retu = getDenseMatrix(u)
+    val rets = getDenseMatrix(s)
+    val retv = getDenseMatrix(v)
   
     // check individual decomposition  
     assertMatrixEquals(retu, svd(0))
     assertMatrixEquals(rets, DoubleMatrix.diag(svd(1)))
     assertMatrixEquals(retv, svd(2))
 
-     // check multiplication guarantee
+    // check multiplication guarantee
     assertMatrixEquals(retu.mmul(rets).mmul(retv.transpose), densea)  
   }
 
@@ -95,20 +100,22 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
       MatrixEntry(a + 1, b + 1, 1.0) }.flatten )
     val k = 1
 
-    val decomposed = SVD.sparseSVD(data, m, n, k)
+    val a = SparseMatrix(data, m, n)
+
+    val decomposed = SVD.sparseSVD(a, k)
     val u = decomposed.U
     val s = decomposed.S
     val v = decomposed.V
-    val retrank = s.toArray.length
+    val retrank = s.data.toArray.length
 
     assert(retrank == 1, "rank returned not one")
 
-    val densea = getDenseMatrix(data, m, n)
+    val densea = getDenseMatrix(a)
     val svd = Singular.sparseSVD(densea)
 
-    val retu = getDenseMatrix(u, m, retrank)
-    val rets = getDenseMatrix(s, retrank, retrank)
-    val retv = getDenseMatrix(v, n, retrank)
+    val retu = getDenseMatrix(u)
+    val rets = getDenseMatrix(s)
+    val retv = getDenseMatrix(v)
 
     // check individual decomposition  
     assertMatrixEquals(retu, svd(0).getColumn(0))
@@ -124,21 +131,22 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val n = 3
     val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) =>
       MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten )
+    val a = SparseMatrix(data, m, n)
     
     val k = 1 // only one svalue above this
 
-    val decomposed = SVD.sparseSVD(data, m, n, k)
+    val decomposed = SVD.sparseSVD(a, k)
     val u = decomposed.U
     val s = decomposed.S
     val v = decomposed.V
-    val retrank = s.toArray.length
+    val retrank = s.data.toArray.length
 
-    val densea = getDenseMatrix(data, m, n)
+    val densea = getDenseMatrix(a)
     val svd = Singular.sparseSVD(densea)
 
-    val retu = getDenseMatrix(u, m, retrank)
-    val rets = getDenseMatrix(s, retrank, retrank)
-    val retv = getDenseMatrix(v, n, retrank)
+    val retu = getDenseMatrix(u)
+    val rets = getDenseMatrix(s)
+    val retv = getDenseMatrix(v)
 
     assert(retrank == 1, "rank returned not one")
     
-- 
cgit v1.2.3


From cf5bd4ab2e9db72d3d9164053523e9e872d85b94 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Thu, 9 Jan 2014 22:39:41 -0800
Subject: fix example

---
 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'examples')

diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
index 4b9e674c68..d9c672f140 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
@@ -33,8 +33,8 @@ import org.apache.spark.mllib.linalg.SparseMatrix
  */
 object SparkSVD {
   def main(args: Array[String]) {
-   if (args.length < 3) {
-      System.err.println("Usage: SVD <master> <file>")
+   if (args.length != 2) {
+      System.err.println("Usage: SparkSVD <master> <file>")
       System.exit(1)
     }
     val sc = new SparkContext(args(0), "SVD",
-- 
cgit v1.2.3


From 1afdeaeb2f436084a6fbe8d73690f148f7b462c4 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Fri, 10 Jan 2014 21:30:54 -0800
Subject: add dimension parameters to example

---
 .../src/main/scala/org/apache/spark/examples/SparkSVD.scala    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'examples')

diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
index d9c672f140..ce7c1c48b5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
@@ -29,12 +29,12 @@ import org.apache.spark.mllib.linalg.SparseMatrix
  * Where i is the column, j the row, and value is the matrix entry
  * 
  * For example input file, see:
- * mllib/data/als/test.data
+ * mllib/data/als/test.data  (example is 4 x 4)
  */
 object SparkSVD {
   def main(args: Array[String]) {
-   if (args.length != 2) {
-      System.err.println("Usage: SparkSVD <master> <file>")
+   if (args.length != 4) {
+      System.err.println("Usage: SparkSVD <master> <file> m n")
       System.exit(1)
     }
     val sc = new SparkContext(args(0), "SVD",
@@ -45,8 +45,8 @@ object SparkSVD {
       val parts = line.split(',')
       MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
     }
-    val m = 4
-    val n = 4
+    val m = args(2).toInt
+    val n = args(3).toInt
 
     // recover largest singular vector
     val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1)
-- 
cgit v1.2.3


From d28bf4182758f08862d5838c918756801a9d7327 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Fri, 17 Jan 2014 13:39:40 -0800
Subject: changes from PR

---
 docs/mllib-guide.md                                |  5 +-
 .../scala/org/apache/spark/examples/SparkSVD.scala | 59 ----------------------
 .../org/apache/spark/examples/mllib/SparkSVD.scala | 59 ++++++++++++++++++++++
 3 files changed, 62 insertions(+), 61 deletions(-)
 delete mode 100644 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala

(limited to 'examples')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index a140ecb618..26350ce106 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -445,11 +445,12 @@ Given an *m x n* matrix *A*, we can compute matrices *U, S, V* such that
 
 *A = U * S * V^T*
 
-There is no restriction on m, but we require n^2 doubles to fit in memory.
+There is no restriction on m, but we require n^2 doubles to
+fit in memory locally on one machine.
 Further, n should be less than m.
 
 The decomposition is computed by first computing *A^TA = V S^2 V^T*,
-computing svd locally on that (since n x n is small),
+computing SVD locally on that (since n x n is small),
 from which we recover S and V.
 Then we compute U via easy matrix multiplication
 as *U =  A * V * S^-1*
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
deleted file mode 100644
index ce7c1c48b5..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples
-      
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.linalg.SVD
-import org.apache.spark.mllib.linalg.MatrixEntry
-import org.apache.spark.mllib.linalg.SparseMatrix
-
-/**
- * Compute SVD of an example matrix
- * Input file should be comma separated, 1 indexed of the form
- * i,j,value
- * Where i is the column, j the row, and value is the matrix entry
- * 
- * For example input file, see:
- * mllib/data/als/test.data  (example is 4 x 4)
- */
-object SparkSVD {
-  def main(args: Array[String]) {
-   if (args.length != 4) {
-      System.err.println("Usage: SparkSVD <master> <file> m n")
-      System.exit(1)
-    }
-    val sc = new SparkContext(args(0), "SVD",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
-
-    // Load and parse the data file
-    val data = sc.textFile(args(1)).map { line =>
-      val parts = line.split(',')
-      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
-    }
-    val m = args(2).toInt
-    val n = args(3).toInt
-
-    // recover largest singular vector
-    val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1)
-    val u = decomposed.U.data
-    val s = decomposed.S.data
-    val v = decomposed.V.data
-
-    println("singular values = " + s.toArray.mkString)
-  }
-}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
new file mode 100644
index 0000000000..50e5f5bd87
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+      
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg.SVD
+import org.apache.spark.mllib.linalg.MatrixEntry
+import org.apache.spark.mllib.linalg.SparseMatrix
+
+/**
+ * Compute SVD of an example matrix
+ * Input file should be comma separated, 1 indexed of the form
+ * i,j,value
+ * Where i is the column, j the row, and value is the matrix entry
+ * 
+ * For example input file, see:
+ * mllib/data/als/test.data  (example is 4 x 4)
+ */
+object SparkSVD {
+  def main(args: Array[String]) {
+   if (args.length != 4) {
+      System.err.println("Usage: SparkSVD <master> <file> m n")
+      System.exit(1)
+    }
+    val sc = new SparkContext(args(0), "SVD",
+      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+
+    // Load and parse the data file
+    val data = sc.textFile(args(1)).map { line =>
+      val parts = line.split(',')
+      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
+    }
+    val m = args(2).toInt
+    val n = args(3).toInt
+
+    // recover largest singular vector
+    val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1)
+    val u = decomposed.U.data
+    val s = decomposed.S.data
+    val v = decomposed.V.data
+
+    println("singular values = " + s.toArray.mkString)
+  }
+}
-- 
cgit v1.2.3


From 4e96757793e7aee165381f80a60b3f46f60c9ebc Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Fri, 17 Jan 2014 14:33:03 -0800
Subject: make example 0-indexed

---
 examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'examples')

diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
index 50e5f5bd87..19676fcc1a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
@@ -43,7 +43,7 @@ object SparkSVD {
     // Load and parse the data file
     val data = sc.textFile(args(1)).map { line =>
       val parts = line.split(',')
-      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
+      MatrixEntry(parts(0).toInt - 1, parts(1).toInt - 1, parts(2).toDouble)
     }
     val m = args(2).toInt
     val n = args(3).toInt
-- 
cgit v1.2.3