From dd0d3f008b5dd478fdfb6d20c53713ca0c7c2be1 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Wed, 1 Jan 2014 19:53:04 -0800 Subject: New documentation --- docs/mllib-guide.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index c1ff9c417c..8c86369ae6 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -210,3 +210,55 @@ at each iteration. Available algorithms for gradient descent: * [GradientDescent](api/mllib/index.html#org.apache.spark.mllib.optimization.GradientDescent) + + + +# Singular Value Decomposition +Singular Value Decomposition for Tall and Skinny matrices. +Given an m x n matrix A, this will compute matrices U, S, V such that +A = U * S * V^T + +There is no restriction on m, but we require n^2 doubles to fit in memory. +Further, n should be less than m. + +The decomposition is computed by first computing A^TA = V S^2 V^T, +computing svd locally on that (since n x n is small), +from which we recover S and V. +Then we compute U via easy matrix multiplication +as U = A * V * S^-1 + +Only singular vectors associated with singular values +greater or equal to MIN_SVALUE are recovered. If there are k +such values, then the dimensions of the return will be: + +S is k x k and diagonal, holding the singular values on diagonal +U is m x k and satisfies U^T*U = eye(k) +V is n x k and satisfies V^TV = eye(k) + +All input and output is expected in sparse matrix format, 1-indexed +as tuples of the form ((i,j),value) all in RDDs + +{% highlight scala %} + +import org.apache.spark.SparkContext +import org.apache.spark.mllib.linalg.SVD + +// Load and parse the data file +val data = sc.textFile("mllib/data/als/test.data").map { line => + val parts = line.split(',') + ((parts(0).toInt, parts(1).toInt), parts(2).toDouble) +} +val m = 4 +val n = 4 + +// recover singular vectors for singular values at or above 1e-5 +val (u, s, v) = SVD.sparseSVD(data, m, n, 1e-5) + +println("singular values = " + s.toArray.mkString) + +{% endhighlight %} + + + + + -- cgit v1.2.3 From b941b6f7b0131b4382b09740d56916574901fd55 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Wed, 1 Jan 2014 20:01:13 -0800 Subject: doc tweaks --- docs/mllib-guide.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 8c86369ae6..08d6d74853 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -231,12 +231,12 @@ Only singular vectors associated with singular values greater or equal to MIN_SVALUE are recovered. If there are k such values, then the dimensions of the return will be: -S is k x k and diagonal, holding the singular values on diagonal -U is m x k and satisfies U^T*U = eye(k) -V is n x k and satisfies V^TV = eye(k) +* *S* is *k x k* and diagonal, holding the singular values on diagonal. +* *U* is *m x k* and satisfies U^T*U = eye(k). +* *V* is *n x k* and satisfies V^TV = eye(k). All input and output is expected in sparse matrix format, 1-indexed -as tuples of the form ((i,j),value) all in RDDs +as tuples of the form ((i,j),value) all in RDDs. Below is example usage. {% highlight scala %} -- cgit v1.2.3 From 97dc527849b836703811acdbd6767685585099df Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Wed, 1 Jan 2014 20:02:37 -0800 Subject: doc tweak --- docs/mllib-guide.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 08d6d74853..8c490eba69 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -215,17 +215,18 @@ Available algorithms for gradient descent: # Singular Value Decomposition Singular Value Decomposition for Tall and Skinny matrices. -Given an m x n matrix A, this will compute matrices U, S, V such that -A = U * S * V^T +Given an *m x n* matrix *A*, this will compute matrices *U, S, V* such that + +*A = U * S * V^T* There is no restriction on m, but we require n^2 doubles to fit in memory. Further, n should be less than m. -The decomposition is computed by first computing A^TA = V S^2 V^T, +The decomposition is computed by first computing *A^TA = V S^2 V^T*, computing svd locally on that (since n x n is small), from which we recover S and V. Then we compute U via easy matrix multiplication -as U = A * V * S^-1 +as *U = A * V * S^-1* Only singular vectors associated with singular values greater or equal to MIN_SVALUE are recovered. If there are k -- cgit v1.2.3 From 53ccf65362d935f89fb9e27b4a3485454fa4c882 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Wed, 1 Jan 2014 20:03:47 -0800 Subject: doc tweaks --- docs/mllib-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 8c490eba69..711187fbea 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -215,7 +215,7 @@ Available algorithms for gradient descent: # Singular Value Decomposition Singular Value Decomposition for Tall and Skinny matrices. -Given an *m x n* matrix *A*, this will compute matrices *U, S, V* such that +Given an *m x n* matrix *A*, we can compute matrices *U, S, V* such that *A = U * S * V^T* -- cgit v1.2.3 From 73daa700bd2acff7ff196c9262dffb2d8b9354bf Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Sat, 4 Jan 2014 01:52:28 -0800 Subject: add k parameter --- docs/mllib-guide.md | 5 +++-- .../scala/org/apache/spark/mllib/linalg/SVD.scala | 24 +++++++++++----------- .../org/apache/spark/mllib/linalg/SVDSuite.scala | 3 +-- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 711187fbea..abeb55d081 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -251,9 +251,10 @@ val data = sc.textFile("mllib/data/als/test.data").map { line => } val m = 4 val n = 4 +val k = 1 -// recover singular vectors for singular values at or above 1e-5 -val (u, s, v) = SVD.sparseSVD(data, m, n, 1e-5) +// recover largest singular vector +val (u, s, v) = SVD.sparseSVD(data, m, n, 1) println("singular values = " + s.toArray.mkString) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala index ac9178e78c..465fc746ed 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala @@ -43,9 +43,8 @@ object SVD { * Then we compute U via easy matrix multiplication * as U = A * V * S^-1 * - * Only singular vectors associated with singular values - * greater or equal to MIN_SVALUE are recovered. If there are k - * such values, then the dimensions of the return will be: + * Only the k largest singular values and associated vectors are found. + * If there are k such values, then the dimensions of the return will be: * * S is k x k and diagonal, holding the singular values on diagonal * U is m x k and satisfies U'U = eye(k) @@ -57,22 +56,22 @@ object SVD { * @param data RDD Matrix in sparse 1-index format ((int, int), value) * @param m number of rows * @param n number of columns - * @param min_svalue Recover singular values greater or equal to min_svalue + * @param k Recover k singular values and vectors * @return Three sparse matrices: U, S, V such that A = USV^T */ def sparseSVD( data: RDD[MatrixEntry], m: Int, n: Int, - min_svalue: Double) + k: Int) : SVDecomposedMatrix = { if (m < n || m <= 0 || n <= 0) { throw new IllegalArgumentException("Expecting a tall and skinny matrix") } - if (min_svalue < 1.0e-8) { - throw new IllegalArgumentException("Minimum singular value requested is too small") + if (k < 1 || k > n) { + throw new IllegalArgumentException("Must request up to n singular values") } // Compute A^T A, assuming rows are sparse enough to fit in memory @@ -93,12 +92,13 @@ object SVD { // Since A^T A is small, we can compute its SVD directly val svd = Singular.sparseSVD(ata) val V = svd(0) - val sigma = MatrixFunctions.sqrt(svd(1)).toArray.filter(x => x >= min_svalue) + val sigmas = MatrixFunctions.sqrt(svd(1)).toArray.filter(x => x > 1e-9) - // threshold s values - if(sigma.isEmpty) { - throw new Exception("All singular values are smaller than min_svalue: " + min_svalue) - } + if(sigmas.size < k) { + throw new Exception("Not enough singular values to return") + } + + val sigma = sigmas.take(k) val sc = data.sparkContext diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala index 71749ff729..dc4e9239a2 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala @@ -66,9 +66,8 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val n = 3 val data = sc.makeRDD(Array.tabulate(m,n){ (a,b)=> MatrixEntry(a+1,b+1, (a+2).toDouble*(b+1)/(1+a+b)) }.flatten ) - val min_svalue = 1.0e-8 - val decomposed = SVD.sparseSVD(data, m, n, min_svalue) + val decomposed = SVD.sparseSVD(data, m, n, n) val u = decomposed.U val v = decomposed.V val s = decomposed.S -- cgit v1.2.3 From 746148bc18d5e25ea93f5ff17a6cb4da9b671b75 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Sun, 5 Jan 2014 18:03:57 -0800 Subject: fix docs to use SparseMatrix --- docs/mllib-guide.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index abeb55d081..653848b6d4 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -243,18 +243,21 @@ as tuples of the form ((i,j),value) all in RDDs. Below is example usage. import org.apache.spark.SparkContext import org.apache.spark.mllib.linalg.SVD +import org.apache.spark.mllib.linalg.SparseMatrix +import org.apache.spark.mllib.linalg.MatrixEntry // Load and parse the data file val data = sc.textFile("mllib/data/als/test.data").map { line => val parts = line.split(',') - ((parts(0).toInt, parts(1).toInt), parts(2).toDouble) + MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) } val m = 4 val n = 4 val k = 1 // recover largest singular vector -val (u, s, v) = SVD.sparseSVD(data, m, n, 1) +val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), k) +val = decomposed.S.data println("singular values = " + s.toArray.mkString) -- cgit v1.2.3 From 4f38b6fab5bf633a205b9039db9d4a26ed28ec89 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Tue, 7 Jan 2014 17:19:28 -0800 Subject: documentation for sparsematrix --- docs/mllib-guide.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 653848b6d4..44e6c8f58b 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -228,8 +228,8 @@ from which we recover S and V. Then we compute U via easy matrix multiplication as *U = A * V * S^-1* -Only singular vectors associated with singular values -greater or equal to MIN_SVALUE are recovered. If there are k +Only singular vectors associated with largest k singular values +are recovered. If there are k such values, then the dimensions of the return will be: * *S* is *k x k* and diagonal, holding the singular values on diagonal. @@ -237,7 +237,8 @@ such values, then the dimensions of the return will be: * *V* is *n x k* and satisfies V^TV = eye(k). All input and output is expected in sparse matrix format, 1-indexed -as tuples of the form ((i,j),value) all in RDDs. Below is example usage. +as tuples of the form ((i,j),value) all in +SparseMatrix RDDs. Below is example usage. {% highlight scala %} -- cgit v1.2.3 From d28bf4182758f08862d5838c918756801a9d7327 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Fri, 17 Jan 2014 13:39:40 -0800 Subject: changes from PR --- docs/mllib-guide.md | 5 +- .../scala/org/apache/spark/examples/SparkSVD.scala | 59 ---------------------- .../org/apache/spark/examples/mllib/SparkSVD.scala | 59 ++++++++++++++++++++++ 3 files changed, 62 insertions(+), 61 deletions(-) delete mode 100644 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala create mode 100644 examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index a140ecb618..26350ce106 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -445,11 +445,12 @@ Given an *m x n* matrix *A*, we can compute matrices *U, S, V* such that *A = U * S * V^T* -There is no restriction on m, but we require n^2 doubles to fit in memory. +There is no restriction on m, but we require n^2 doubles to +fit in memory locally on one machine. Further, n should be less than m. The decomposition is computed by first computing *A^TA = V S^2 V^T*, -computing svd locally on that (since n x n is small), +computing SVD locally on that (since n x n is small), from which we recover S and V. Then we compute U via easy matrix multiplication as *U = A * V * S^-1* diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala deleted file mode 100644 index ce7c1c48b5..0000000000 --- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples - -import org.apache.spark.SparkContext -import org.apache.spark.mllib.linalg.SVD -import org.apache.spark.mllib.linalg.MatrixEntry -import org.apache.spark.mllib.linalg.SparseMatrix - -/** - * Compute SVD of an example matrix - * Input file should be comma separated, 1 indexed of the form - * i,j,value - * Where i is the column, j the row, and value is the matrix entry - * - * For example input file, see: - * mllib/data/als/test.data (example is 4 x 4) - */ -object SparkSVD { - def main(args: Array[String]) { - if (args.length != 4) { - System.err.println("Usage: SparkSVD m n") - System.exit(1) - } - val sc = new SparkContext(args(0), "SVD", - System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) - - // Load and parse the data file - val data = sc.textFile(args(1)).map { line => - val parts = line.split(',') - MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) - } - val m = args(2).toInt - val n = args(3).toInt - - // recover largest singular vector - val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1) - val u = decomposed.U.data - val s = decomposed.S.data - val v = decomposed.V.data - - println("singular values = " + s.toArray.mkString) - } -} diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala new file mode 100644 index 0000000000..50e5f5bd87 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.mllib + +import org.apache.spark.SparkContext +import org.apache.spark.mllib.linalg.SVD +import org.apache.spark.mllib.linalg.MatrixEntry +import org.apache.spark.mllib.linalg.SparseMatrix + +/** + * Compute SVD of an example matrix + * Input file should be comma separated, 1 indexed of the form + * i,j,value + * Where i is the column, j the row, and value is the matrix entry + * + * For example input file, see: + * mllib/data/als/test.data (example is 4 x 4) + */ +object SparkSVD { + def main(args: Array[String]) { + if (args.length != 4) { + System.err.println("Usage: SparkSVD m n") + System.exit(1) + } + val sc = new SparkContext(args(0), "SVD", + System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) + + // Load and parse the data file + val data = sc.textFile(args(1)).map { line => + val parts = line.split(',') + MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) + } + val m = args(2).toInt + val n = args(3).toInt + + // recover largest singular vector + val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1) + val u = decomposed.U.data + val s = decomposed.S.data + val v = decomposed.V.data + + println("singular values = " + s.toArray.mkString) + } +} -- cgit v1.2.3 From cb13b15a60ce8eb55b2d2971a57ac8d4bd5c7574 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Fri, 17 Jan 2014 13:55:42 -0800 Subject: use 0-indexing --- docs/mllib-guide.md | 4 +-- .../apache/spark/mllib/linalg/MatrixEntry.scala | 4 +-- .../org/apache/spark/mllib/linalg/MatrixSVD.scala | 29 ++++++++++++++++++++++ .../scala/org/apache/spark/mllib/linalg/SVD.scala | 12 ++++----- .../spark/mllib/linalg/SVDecomposedMatrix.scala | 29 ---------------------- .../org/apache/spark/mllib/linalg/SVDSuite.scala | 8 +++--- 6 files changed, 43 insertions(+), 43 deletions(-) create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 26350ce106..89ac64a086 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -476,8 +476,8 @@ import org.apache.spark.mllib.linalg.MatrixEntry // Load and parse the data file val data = sc.textFile("mllib/data/als/test.data").map { line => - val parts = line.split(',') - MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) + val parts = line.split(',') + MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) } val m = 4 val n = 4 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala index c7f2abab97..416996fcbe 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala @@ -20,8 +20,8 @@ package org.apache.spark.mllib.linalg /** * Class that represents an entry in a sparse matrix of doubles. * - * @param i row index (1 indexing used) - * @param j column index (1 indexing used) + * @param i row index (0 indexing used) + * @param j column index (0 indexing used) * @param mval value of entry in matrix */ case class MatrixEntry(val i: Int, val j: Int, val mval: Double) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala new file mode 100644 index 0000000000..622003576d --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.linalg + +/** + * Class that represents the SV decomposition of a matrix + * + * @param U such that A = USV^T + * @param S such that A = USV^T + * @param V such that A = USV^T + */ +case class SVDecomposedMatrix(val U: SparseMatrix, + val S: SparseMatrix, + val V: SparseMatrix) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala index 6590e8f357..ba7a0fde77 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala @@ -49,7 +49,7 @@ class SVD { /** * Top-level methods for calling Singular Value Decomposition - * NOTE: All matrices are in 1-indexed sparse format RDD[((int, int), value)] + * NOTE: All matrices are in 0-indexed sparse format RDD[((int, int), value)] */ object SVD { /** @@ -73,7 +73,7 @@ object SVD { * U is m x k and satisfies U'U = eye(k) * V is n x k and satisfies V'V = eye(k) * - * All input and output is expected in sparse matrix format, 1-indexed + * All input and output is expected in sparse matrix format, 0-indexed * as tuples of the form ((i,j),value) all in RDDs using the * SparseMatrix class * @@ -110,7 +110,7 @@ object SVD { // Construct jblas A^T A locally val ata = DoubleMatrix.zeros(n, n) for (entry <- emits.toArray) { - ata.put(entry._1._1 - 1, entry._1._2 - 1, entry._2) + ata.put(entry._1._1, entry._1._2, entry._2) } // Since A^T A is small, we can compute its SVD directly @@ -129,18 +129,18 @@ object SVD { // prepare V for returning val retVdata = sc.makeRDD( Array.tabulate(V.rows, sigma.length){ (i,j) => - MatrixEntry(i + 1, j + 1, V.get(i,j)) }.flatten) + MatrixEntry(i, j, V.get(i,j)) }.flatten) val retV = SparseMatrix(retVdata, V.rows, sigma.length) val retSdata = sc.makeRDD(Array.tabulate(sigma.length){ - x => MatrixEntry(x + 1, x + 1, sigma(x))}) + x => MatrixEntry(x, x, sigma(x))}) val retS = SparseMatrix(retSdata, sigma.length, sigma.length) // Compute U as U = A V S^-1 // turn V S^-1 into an RDD as a sparse matrix val vsirdd = sc.makeRDD(Array.tabulate(V.rows, sigma.length) - { (i,j) => ((i + 1, j + 1), V.get(i,j) / sigma(j)) }.flatten) + { (i,j) => ((i, j), V.get(i,j) / sigma(j)) }.flatten) // Multiply A by VS^-1 val aCols = data.map(entry => (entry.j, (entry.i, entry.mval))) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala deleted file mode 100644 index 622003576d..0000000000 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.mllib.linalg - -/** - * Class that represents the SV decomposition of a matrix - * - * @param U such that A = USV^T - * @param S such that A = USV^T - * @param V such that A = USV^T - */ -case class SVDecomposedMatrix(val U: SparseMatrix, - val S: SparseMatrix, - val V: SparseMatrix) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala index f239e8505f..12b3801722 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala @@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = matrix.m val n = matrix.n val ret = DoubleMatrix.zeros(m, n) - matrix.data.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval)) + matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval)) ret } @@ -68,7 +68,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = 10 val n = 3 val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) => - MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten ) + MatrixEntry(a, b, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten ) val a = SparseMatrix(data, m, n) @@ -97,7 +97,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = 10 val n = 3 val data = sc.makeRDD(Array.tabulate(m, n){ (a,b) => - MatrixEntry(a + 1, b + 1, 1.0) }.flatten ) + MatrixEntry(a, b, 1.0) }.flatten ) val k = 1 val a = SparseMatrix(data, m, n) @@ -130,7 +130,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = 10 val n = 3 val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) => - MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten ) + MatrixEntry(a, b, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten ) val a = SparseMatrix(data, m, n) val k = 1 // only one svalue above this -- cgit v1.2.3 From 5c639d70df3da48bb52841aa57074ec151bb61cf Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Fri, 17 Jan 2014 14:31:39 -0800 Subject: 0index docs --- docs/mllib-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 89ac64a086..5be8ce1ebe 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -463,7 +463,7 @@ such values, then the dimensions of the return will be: * *U* is *m x k* and satisfies U^T*U = eye(k). * *V* is *n x k* and satisfies V^TV = eye(k). -All input and output is expected in sparse matrix format, 1-indexed +All input and output is expected in sparse matrix format, 0-indexed as tuples of the form ((i,j),value) all in SparseMatrix RDDs. Below is example usage. -- cgit v1.2.3