From dd0d3f008b5dd478fdfb6d20c53713ca0c7c2be1 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Wed, 1 Jan 2014 19:53:04 -0800
Subject: New documentation

---
 docs/mllib-guide.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index c1ff9c417c..8c86369ae6 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -210,3 +210,55 @@ at each iteration.
 Available algorithms for gradient descent:
 
 * [GradientDescent](api/mllib/index.html#org.apache.spark.mllib.optimization.GradientDescent)
+
+
+
+# Singular Value Decomposition
+Singular Value Decomposition for Tall and Skinny matrices.
+Given an m x n matrix A, this will compute matrices U, S, V such that
+A = U * S * V^T
+
+There is no restriction on m, but we require n^2 doubles to fit in memory.
+Further, n should be less than m.
+ 
+The decomposition is computed by first computing A^TA = V S^2 V^T,
+computing svd locally on that (since n x n is small),
+from which we recover S and V. 
+Then we compute U via easy matrix multiplication
+as U =  A * V * S^-1
+ 
+Only singular vectors associated with singular values
+greater or equal to MIN_SVALUE are recovered. If there are k
+such values, then the dimensions of the return will be:
+
+S is k x k and diagonal, holding the singular values on diagonal
+U is m x k and satisfies U^T*U = eye(k)
+V is n x k and satisfies V^TV = eye(k)
+
+All input and output is expected in sparse matrix format, 1-indexed
+as tuples of the form ((i,j),value) all in RDDs
+
+{% highlight scala %}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg.SVD
+
+// Load and parse the data file
+val data = sc.textFile("mllib/data/als/test.data").map { line =>
+      val parts = line.split(',')
+      ((parts(0).toInt, parts(1).toInt), parts(2).toDouble)
+}
+val m = 4
+val n = 4
+
+// recover singular vectors for singular values at or above 1e-5
+val (u, s, v) = SVD.sparseSVD(data, m, n, 1e-5)
+
+println("singular values = " + s.toArray.mkString)
+
+{% endhighlight %}
+
+
+
+
+
-- 
cgit v1.2.3


From b941b6f7b0131b4382b09740d56916574901fd55 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Wed, 1 Jan 2014 20:01:13 -0800
Subject: doc tweaks

---
 docs/mllib-guide.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 8c86369ae6..08d6d74853 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -231,12 +231,12 @@ Only singular vectors associated with singular values
 greater or equal to MIN_SVALUE are recovered. If there are k
 such values, then the dimensions of the return will be:
 
-S is k x k and diagonal, holding the singular values on diagonal
-U is m x k and satisfies U^T*U = eye(k)
-V is n x k and satisfies V^TV = eye(k)
+* *S* is *k x k* and diagonal, holding the singular values on diagonal.
+* *U* is *m x k* and satisfies U^T*U = eye(k).
+* *V* is *n x k* and satisfies V^TV = eye(k).
 
 All input and output is expected in sparse matrix format, 1-indexed
-as tuples of the form ((i,j),value) all in RDDs
+as tuples of the form ((i,j),value) all in RDDs. Below is example usage.
 
 {% highlight scala %}
 
-- 
cgit v1.2.3


From 97dc527849b836703811acdbd6767685585099df Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Wed, 1 Jan 2014 20:02:37 -0800
Subject: doc tweak

---
 docs/mllib-guide.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 08d6d74853..8c490eba69 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -215,17 +215,18 @@ Available algorithms for gradient descent:
 
 # Singular Value Decomposition
 Singular Value Decomposition for Tall and Skinny matrices.
-Given an m x n matrix A, this will compute matrices U, S, V such that
-A = U * S * V^T
+Given an *m x n* matrix *A*, this will compute matrices *U, S, V* such that
+
+*A = U * S * V^T*
 
 There is no restriction on m, but we require n^2 doubles to fit in memory.
 Further, n should be less than m.
  
-The decomposition is computed by first computing A^TA = V S^2 V^T,
+The decomposition is computed by first computing *A^TA = V S^2 V^T*,
 computing svd locally on that (since n x n is small),
 from which we recover S and V. 
 Then we compute U via easy matrix multiplication
-as U =  A * V * S^-1
+as *U =  A * V * S^-1*
  
 Only singular vectors associated with singular values
 greater or equal to MIN_SVALUE are recovered. If there are k
-- 
cgit v1.2.3


From 53ccf65362d935f89fb9e27b4a3485454fa4c882 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Wed, 1 Jan 2014 20:03:47 -0800
Subject: doc tweaks

---
 docs/mllib-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 8c490eba69..711187fbea 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -215,7 +215,7 @@ Available algorithms for gradient descent:
 
 # Singular Value Decomposition
 Singular Value Decomposition for Tall and Skinny matrices.
-Given an *m x n* matrix *A*, this will compute matrices *U, S, V* such that
+Given an *m x n* matrix *A*, we can compute matrices *U, S, V* such that
 
 *A = U * S * V^T*
 
-- 
cgit v1.2.3


From 73daa700bd2acff7ff196c9262dffb2d8b9354bf Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Sat, 4 Jan 2014 01:52:28 -0800
Subject: add k parameter

---
 docs/mllib-guide.md                                |  5 +++--
 .../scala/org/apache/spark/mllib/linalg/SVD.scala  | 24 +++++++++++-----------
 .../org/apache/spark/mllib/linalg/SVDSuite.scala   |  3 +--
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 711187fbea..abeb55d081 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -251,9 +251,10 @@ val data = sc.textFile("mllib/data/als/test.data").map { line =>
 }
 val m = 4
 val n = 4
+val k = 1
 
-// recover singular vectors for singular values at or above 1e-5
-val (u, s, v) = SVD.sparseSVD(data, m, n, 1e-5)
+// recover largest singular vector
+val (u, s, v) = SVD.sparseSVD(data, m, n, 1)
 
 println("singular values = " + s.toArray.mkString)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
index ac9178e78c..465fc746ed 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
@@ -43,9 +43,8 @@ object SVD {
  * Then we compute U via easy matrix multiplication
  * as U =  A * V * S^-1
  * 
- * Only singular vectors associated with singular values
- * greater or equal to MIN_SVALUE are recovered. If there are k
- * such values, then the dimensions of the return will be:
+ * Only the k largest singular values and associated vectors are found.
+ * If there are k such values, then the dimensions of the return will be:
  *
  * S is k x k and diagonal, holding the singular values on diagonal
  * U is m x k and satisfies U'U = eye(k)
@@ -57,22 +56,22 @@ object SVD {
  * @param data RDD Matrix in sparse 1-index format ((int, int), value)
  * @param m number of rows
  * @param n number of columns
- * @param min_svalue Recover singular values greater or equal to min_svalue
+ * @param k Recover k singular values and vectors
  * @return Three sparse matrices: U, S, V such that A = USV^T
  */
   def sparseSVD(
       data: RDD[MatrixEntry],
       m: Int,
       n: Int,
-      min_svalue: Double)
+      k: Int)
     : SVDecomposedMatrix =
   {
     if (m < n || m <= 0 || n <= 0) {
       throw new IllegalArgumentException("Expecting a tall and skinny matrix")
     }
 
-    if (min_svalue < 1.0e-8) {
-      throw new IllegalArgumentException("Minimum singular value requested is too small")
+    if (k < 1 || k > n) {
+      throw new IllegalArgumentException("Must request up to n singular values")
     }
 
     // Compute A^T A, assuming rows are sparse enough to fit in memory
@@ -93,12 +92,13 @@ object SVD {
     // Since A^T A is small, we can compute its SVD directly
     val svd = Singular.sparseSVD(ata)
     val V = svd(0)
-    val sigma = MatrixFunctions.sqrt(svd(1)).toArray.filter(x => x >= min_svalue)
+    val sigmas = MatrixFunctions.sqrt(svd(1)).toArray.filter(x => x > 1e-9)
 
-    // threshold s values
-    if(sigma.isEmpty) {
-      throw new Exception("All singular values are smaller than min_svalue: " + min_svalue)
-    }
+    if(sigmas.size < k) {
+      throw new Exception("Not enough singular values to return")
+    } 
+
+    val sigma = sigmas.take(k)
 
     val sc = data.sparkContext
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
index 71749ff729..dc4e9239a2 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
@@ -66,9 +66,8 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val n = 3
     val data = sc.makeRDD(Array.tabulate(m,n){ (a,b)=>
       MatrixEntry(a+1,b+1, (a+2).toDouble*(b+1)/(1+a+b)) }.flatten )
-    val min_svalue = 1.0e-8
 
-    val decomposed = SVD.sparseSVD(data, m, n, min_svalue)
+    val decomposed = SVD.sparseSVD(data, m, n, n)
     val u = decomposed.U
     val v = decomposed.V
     val s = decomposed.S    
-- 
cgit v1.2.3


From 746148bc18d5e25ea93f5ff17a6cb4da9b671b75 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Sun, 5 Jan 2014 18:03:57 -0800
Subject: fix docs to use SparseMatrix

---
 docs/mllib-guide.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index abeb55d081..653848b6d4 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -243,18 +243,21 @@ as tuples of the form ((i,j),value) all in RDDs. Below is example usage.
 
 import org.apache.spark.SparkContext
 import org.apache.spark.mllib.linalg.SVD
+import org.apache.spark.mllib.linalg.SparseMatrix
+import org.apache.spark.mllib.linalg.MatrixEntry
 
 // Load and parse the data file
 val data = sc.textFile("mllib/data/als/test.data").map { line =>
       val parts = line.split(',')
-      ((parts(0).toInt, parts(1).toInt), parts(2).toDouble)
+      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
 }
 val m = 4
 val n = 4
 val k = 1
 
 // recover largest singular vector
-val (u, s, v) = SVD.sparseSVD(data, m, n, 1)
+val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), k)
+val = decomposed.S.data
 
 println("singular values = " + s.toArray.mkString)
 
-- 
cgit v1.2.3


From 4f38b6fab5bf633a205b9039db9d4a26ed28ec89 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Tue, 7 Jan 2014 17:19:28 -0800
Subject: documentation for sparsematrix

---
 docs/mllib-guide.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 653848b6d4..44e6c8f58b 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -228,8 +228,8 @@ from which we recover S and V.
 Then we compute U via easy matrix multiplication
 as *U =  A * V * S^-1*
  
-Only singular vectors associated with singular values
-greater or equal to MIN_SVALUE are recovered. If there are k
+Only singular vectors associated with largest k singular values
+are recovered. If there are k
 such values, then the dimensions of the return will be:
 
 * *S* is *k x k* and diagonal, holding the singular values on diagonal.
@@ -237,7 +237,8 @@ such values, then the dimensions of the return will be:
 * *V* is *n x k* and satisfies V^TV = eye(k).
 
 All input and output is expected in sparse matrix format, 1-indexed
-as tuples of the form ((i,j),value) all in RDDs. Below is example usage.
+as tuples of the form ((i,j),value) all in 
+SparseMatrix RDDs. Below is example usage.
 
 {% highlight scala %}
 
-- 
cgit v1.2.3


From d28bf4182758f08862d5838c918756801a9d7327 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Fri, 17 Jan 2014 13:39:40 -0800
Subject: changes from PR

---
 docs/mllib-guide.md                                |  5 +-
 .../scala/org/apache/spark/examples/SparkSVD.scala | 59 ----------------------
 .../org/apache/spark/examples/mllib/SparkSVD.scala | 59 ++++++++++++++++++++++
 3 files changed, 62 insertions(+), 61 deletions(-)
 delete mode 100644 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index a140ecb618..26350ce106 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -445,11 +445,12 @@ Given an *m x n* matrix *A*, we can compute matrices *U, S, V* such that
 
 *A = U * S * V^T*
 
-There is no restriction on m, but we require n^2 doubles to fit in memory.
+There is no restriction on m, but we require n^2 doubles to
+fit in memory locally on one machine.
 Further, n should be less than m.
 
 The decomposition is computed by first computing *A^TA = V S^2 V^T*,
-computing svd locally on that (since n x n is small),
+computing SVD locally on that (since n x n is small),
 from which we recover S and V.
 Then we compute U via easy matrix multiplication
 as *U =  A * V * S^-1*
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
deleted file mode 100644
index ce7c1c48b5..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples
-      
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.linalg.SVD
-import org.apache.spark.mllib.linalg.MatrixEntry
-import org.apache.spark.mllib.linalg.SparseMatrix
-
-/**
- * Compute SVD of an example matrix
- * Input file should be comma separated, 1 indexed of the form
- * i,j,value
- * Where i is the column, j the row, and value is the matrix entry
- * 
- * For example input file, see:
- * mllib/data/als/test.data  (example is 4 x 4)
- */
-object SparkSVD {
-  def main(args: Array[String]) {
-   if (args.length != 4) {
-      System.err.println("Usage: SparkSVD <master> <file> m n")
-      System.exit(1)
-    }
-    val sc = new SparkContext(args(0), "SVD",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
-
-    // Load and parse the data file
-    val data = sc.textFile(args(1)).map { line =>
-      val parts = line.split(',')
-      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
-    }
-    val m = args(2).toInt
-    val n = args(3).toInt
-
-    // recover largest singular vector
-    val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1)
-    val u = decomposed.U.data
-    val s = decomposed.S.data
-    val v = decomposed.V.data
-
-    println("singular values = " + s.toArray.mkString)
-  }
-}
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
new file mode 100644
index 0000000000..50e5f5bd87
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+      
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.linalg.SVD
+import org.apache.spark.mllib.linalg.MatrixEntry
+import org.apache.spark.mllib.linalg.SparseMatrix
+
+/**
+ * Compute SVD of an example matrix
+ * Input file should be comma separated, 1 indexed of the form
+ * i,j,value
+ * Where i is the column, j the row, and value is the matrix entry
+ * 
+ * For example input file, see:
+ * mllib/data/als/test.data  (example is 4 x 4)
+ */
+object SparkSVD {
+  def main(args: Array[String]) {
+   if (args.length != 4) {
+      System.err.println("Usage: SparkSVD <master> <file> m n")
+      System.exit(1)
+    }
+    val sc = new SparkContext(args(0), "SVD",
+      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+
+    // Load and parse the data file
+    val data = sc.textFile(args(1)).map { line =>
+      val parts = line.split(',')
+      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
+    }
+    val m = args(2).toInt
+    val n = args(3).toInt
+
+    // recover largest singular vector
+    val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1)
+    val u = decomposed.U.data
+    val s = decomposed.S.data
+    val v = decomposed.V.data
+
+    println("singular values = " + s.toArray.mkString)
+  }
+}
-- 
cgit v1.2.3


From cb13b15a60ce8eb55b2d2971a57ac8d4bd5c7574 Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Fri, 17 Jan 2014 13:55:42 -0800
Subject: use 0-indexing

---
 docs/mllib-guide.md                                |  4 +--
 .../apache/spark/mllib/linalg/MatrixEntry.scala    |  4 +--
 .../org/apache/spark/mllib/linalg/MatrixSVD.scala  | 29 ++++++++++++++++++++++
 .../scala/org/apache/spark/mllib/linalg/SVD.scala  | 12 ++++-----
 .../spark/mllib/linalg/SVDecomposedMatrix.scala    | 29 ----------------------
 .../org/apache/spark/mllib/linalg/SVDSuite.scala   |  8 +++---
 6 files changed, 43 insertions(+), 43 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala
 delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 26350ce106..89ac64a086 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -476,8 +476,8 @@ import org.apache.spark.mllib.linalg.MatrixEntry
 
 // Load and parse the data file
 val data = sc.textFile("mllib/data/als/test.data").map { line =>
-      val parts = line.split(',')
-      MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
+  val parts = line.split(',')
+  MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble)
 }
 val m = 4
 val n = 4
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala
index c7f2abab97..416996fcbe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala
@@ -20,8 +20,8 @@ package org.apache.spark.mllib.linalg
 /**
  * Class that represents an entry in a sparse matrix of doubles.
  *
- * @param i row index (1 indexing used)
- * @param j column index (1 indexing used)
+ * @param i row index (0 indexing used)
+ * @param j column index (0 indexing used)
  * @param mval value of entry in matrix
  */
 case class MatrixEntry(val i: Int, val j: Int, val mval: Double)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala
new file mode 100644
index 0000000000..622003576d
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
+/**
+ * Class that represents the SV decomposition of a matrix
+ *
+ * @param U such that A = USV^T
+ * @param S such that A = USV^T
+ * @param V such that A = USV^T
+ */
+case class SVDecomposedMatrix(val U: SparseMatrix,
+                              val S: SparseMatrix,
+                              val V: SparseMatrix)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
index 6590e8f357..ba7a0fde77 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
@@ -49,7 +49,7 @@ class SVD {
 
 /**
  * Top-level methods for calling Singular Value Decomposition
- * NOTE: All matrices are in 1-indexed sparse format RDD[((int, int), value)]
+ * NOTE: All matrices are in 0-indexed sparse format RDD[((int, int), value)]
  */
 object SVD {
 /**
@@ -73,7 +73,7 @@ object SVD {
  * U is m x k and satisfies U'U = eye(k)
  * V is n x k and satisfies V'V = eye(k)
  *
- * All input and output is expected in sparse matrix format, 1-indexed
+ * All input and output is expected in sparse matrix format, 0-indexed
  * as tuples of the form ((i,j),value) all in RDDs using the
  * SparseMatrix class
  *
@@ -110,7 +110,7 @@ object SVD {
     // Construct jblas A^T A locally
     val ata = DoubleMatrix.zeros(n, n)
     for (entry <- emits.toArray) {
-      ata.put(entry._1._1 - 1, entry._1._2 - 1, entry._2)
+      ata.put(entry._1._1, entry._1._2, entry._2)
     }
 
     // Since A^T A is small, we can compute its SVD directly
@@ -129,18 +129,18 @@ object SVD {
     // prepare V for returning
     val retVdata = sc.makeRDD(
             Array.tabulate(V.rows, sigma.length){ (i,j) =>
-                    MatrixEntry(i + 1, j + 1, V.get(i,j)) }.flatten)
+                    MatrixEntry(i, j, V.get(i,j)) }.flatten)
     val retV = SparseMatrix(retVdata, V.rows, sigma.length)
      
     val retSdata = sc.makeRDD(Array.tabulate(sigma.length){
-      x => MatrixEntry(x + 1, x + 1, sigma(x))})
+      x => MatrixEntry(x, x, sigma(x))})
 
     val retS = SparseMatrix(retSdata, sigma.length, sigma.length)
 
     // Compute U as U = A V S^-1
     // turn V S^-1 into an RDD as a sparse matrix
     val vsirdd = sc.makeRDD(Array.tabulate(V.rows, sigma.length)
-                { (i,j) => ((i + 1, j + 1), V.get(i,j) / sigma(j))  }.flatten)
+                { (i,j) => ((i, j), V.get(i,j) / sigma(j))  }.flatten)
 
     // Multiply A by VS^-1
     val aCols = data.map(entry => (entry.j, (entry.i, entry.mval)))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala
deleted file mode 100644
index 622003576d..0000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.linalg
-
-/**
- * Class that represents the SV decomposition of a matrix
- *
- * @param U such that A = USV^T
- * @param S such that A = USV^T
- * @param V such that A = USV^T
- */
-case class SVDecomposedMatrix(val U: SparseMatrix,
-                              val S: SparseMatrix,
-                              val V: SparseMatrix)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
index f239e8505f..12b3801722 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala
@@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val m = matrix.m
     val n = matrix.n
     val ret = DoubleMatrix.zeros(m, n)
-    matrix.data.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval))
+    matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval))
     ret
   }
 
@@ -68,7 +68,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val m = 10
     val n = 3
     val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) =>
-      MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten )
+      MatrixEntry(a, b, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten )
 
     val a = SparseMatrix(data, m, n)
 
@@ -97,7 +97,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val m = 10
     val n = 3   
     val data = sc.makeRDD(Array.tabulate(m, n){ (a,b) =>
-      MatrixEntry(a + 1, b + 1, 1.0) }.flatten )
+      MatrixEntry(a, b, 1.0) }.flatten )
     val k = 1
 
     val a = SparseMatrix(data, m, n)
@@ -130,7 +130,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
     val m = 10
     val n = 3
     val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) =>
-      MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten )
+      MatrixEntry(a, b, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten )
     val a = SparseMatrix(data, m, n)
     
     val k = 1 // only one svalue above this
-- 
cgit v1.2.3


From 5c639d70df3da48bb52841aa57074ec151bb61cf Mon Sep 17 00:00:00 2001
From: Reza Zadeh <rizlar@gmail.com>
Date: Fri, 17 Jan 2014 14:31:39 -0800
Subject: 0index docs

---
 docs/mllib-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'docs')

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 89ac64a086..5be8ce1ebe 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -463,7 +463,7 @@ such values, then the dimensions of the return will be:
 * *U* is *m x k* and satisfies U^T*U = eye(k).
 * *V* is *n x k* and satisfies V^TV = eye(k).
 
-All input and output is expected in sparse matrix format, 1-indexed
+All input and output is expected in sparse matrix format, 0-indexed
 as tuples of the form ((i,j),value) all in
 SparseMatrix RDDs. Below is example usage.
 
-- 
cgit v1.2.3