From e9bd6cb51dce9222a5a284cd171b299b0169852b Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Sat, 4 Jan 2014 12:33:22 -0800 Subject: new example file --- .../scala/org/apache/spark/examples/SparkSVD.scala | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala (limited to 'examples') diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala new file mode 100644 index 0000000000..5590ee728a --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples + +import org.apache.spark.SparkContext +import org.apache.spark.mllib.linalg.SVD +import org.apache.spark.mllib.linalg.MatrixEntry + +/** + * Compute SVD of an example matrix + * Input file should be comma separated, 1 indexed of the form + * i,j,value + * Where i is the column, j the row, and value is the matrix entry + * + * For example input file, see: + * mllib/data/als/test.data + */ +object SparkSVD { + def main(args: Array[String]) { + if (args.length < 3) { + System.err.println("Usage: SVD ") + System.exit(1) + } + val sc = new SparkContext(args(0), "SVD", + System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) + + // Load and parse the data file + val data = sc.textFile(args(1)).map { line => + val parts = line.split(',') + MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) + } + val m = 4 + val n = 4 + + // recover largest singular vector + val decomposed = SVD.sparseSVD(data, m, n, 1) + val u = decomposed.U + val s = decomposed.S + val v = decomposed.V + + println("singular values = " + s.toArray.mkString) + } +} -- cgit v1.2.3 From 06c0f7628a213a08ef5adeab903160b806680acf Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Sat, 4 Jan 2014 14:28:07 -0800 Subject: use SparseMatrix everywhere --- .../scala/org/apache/spark/examples/SparkSVD.scala | 9 +-- .../scala/org/apache/spark/mllib/linalg/SVD.scala | 67 +++++++--------------- .../spark/mllib/linalg/SVDecomposedMatrix.scala | 8 +-- .../apache/spark/mllib/linalg/SparseMatrix.scala | 30 ++++++++++ .../org/apache/spark/mllib/linalg/SVDSuite.scala | 50 +++++++++------- 5 files changed, 89 insertions(+), 75 deletions(-) create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala (limited to 'examples') diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala index 5590ee728a..4b9e674c68 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala @@ -20,6 +20,7 @@ package org.apache.spark.examples import org.apache.spark.SparkContext import org.apache.spark.mllib.linalg.SVD import org.apache.spark.mllib.linalg.MatrixEntry +import org.apache.spark.mllib.linalg.SparseMatrix /** * Compute SVD of an example matrix @@ -48,10 +49,10 @@ object SparkSVD { val n = 4 // recover largest singular vector - val decomposed = SVD.sparseSVD(data, m, n, 1) - val u = decomposed.U - val s = decomposed.S - val v = decomposed.V + val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1) + val u = decomposed.U.data + val s = decomposed.S.data + val v = decomposed.V.data println("singular values = " + s.toArray.mkString) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala index 31990b0223..a8efdc787e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala @@ -26,11 +26,8 @@ import org.jblas.{DoubleMatrix, Singular, MatrixFunctions} /** * Class used to obtain singular value decompositions - * @param data Matrix in sparse matrix format - * @param m number of rows - * @param n number of columns */ -class SVD(var data: RDD[MatrixEntry], var m: Int, var n: Int) { +class SVD { private var k: Int = 1 /** @@ -41,35 +38,11 @@ class SVD(var data: RDD[MatrixEntry], var m: Int, var n: Int) { this } - /** - * Set matrix to be used for SVD - */ - def setDatadata(data: RDD[MatrixEntry]): this.type = { - this.data = data - this - } - - /** - * Set dimensions of matrix: rows - */ - def setNumRows(m: Int): this.type = { - this.m = m - this - } - - /** - * Set dimensions of matrix: columns - */ - def setNumCols(n: Int): this.type = { - this.n = n - this - } - /** * Compute SVD using the current set parameters */ - def computeSVD() : SVDecomposedMatrix = { - SVD.sparseSVD(data, m, n, k) + def computeSVD(matrix: SparseMatrix) : SVDecomposedMatrix = { + SVD.sparseSVD(matrix, k) } } @@ -103,19 +76,19 @@ object SVD { * All input and output is expected in sparse matrix format, 1-indexed * as tuples of the form ((i,j),value) all in RDDs * - * @param data RDD Matrix in sparse 1-index format ((int, int), value) - * @param m number of rows - * @param n number of columns + * @param matrix sparse matrix to factorize * @param k Recover k singular values and vectors * @return Three sparse matrices: U, S, V such that A = USV^T */ def sparseSVD( - data: RDD[MatrixEntry], - m: Int, - n: Int, + matrix: SparseMatrix, k: Int) : SVDecomposedMatrix = { + val data = matrix.data + val m = matrix.m + val n = matrix.n + if (m < n || m <= 0 || n <= 0) { throw new IllegalArgumentException("Expecting a tall and skinny matrix") } @@ -153,13 +126,16 @@ object SVD { val sc = data.sparkContext // prepare V for returning - val retV = sc.makeRDD( + val retVdata = sc.makeRDD( Array.tabulate(V.rows, sigma.length){ (i,j) => MatrixEntry(i + 1, j + 1, V.get(i,j)) }.flatten) - - val retS = sc.makeRDD(Array.tabulate(sigma.length){ + val retV = SparseMatrix(retVdata, V.rows, sigma.length) + + val retSdata = sc.makeRDD(Array.tabulate(sigma.length){ x => MatrixEntry(x + 1, x + 1, sigma(x))}) + val retS = SparseMatrix(retSdata, sigma.length, sigma.length) + // Compute U as U = A V S^-1 // turn V S^-1 into an RDD as a sparse matrix val vsirdd = sc.makeRDD(Array.tabulate(V.rows, sigma.length) @@ -168,10 +144,11 @@ object SVD { // Multiply A by VS^-1 val aCols = data.map(entry => (entry.j, (entry.i, entry.mval))) val bRows = vsirdd.map(entry => (entry._1._1, (entry._1._2, entry._2))) - val retU = aCols.join(bRows).map( {case (key, ( (rowInd, rowVal), (colInd, colVal)) ) + val retUdata = aCols.join(bRows).map( {case (key, ( (rowInd, rowVal), (colInd, colVal)) ) => ((rowInd, colInd), rowVal*colVal)}).reduceByKey(_+_) .map{ case ((row, col), mval) => MatrixEntry(row, col, mval)} - + val retU = SparseMatrix(retUdata, m, sigma.length) + SVDecomposedMatrix(retU, retS, retV) } @@ -195,10 +172,10 @@ object SVD { MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) } - val decomposed = SVD.sparseSVD(data, m, n, k) - val u = decomposed.U - val s = decomposed.S - val v = decomposed.V + val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), k) + val u = decomposed.U.data + val s = decomposed.S.data + val v = decomposed.V.data println("Computed " + s.toArray.length + " singular values and vectors") u.saveAsTextFile(output_u) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala index e0bcdab2d2..622003576d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala @@ -17,8 +17,6 @@ package org.apache.spark.mllib.linalg -import org.apache.spark.rdd.RDD - /** * Class that represents the SV decomposition of a matrix * @@ -26,6 +24,6 @@ import org.apache.spark.rdd.RDD * @param S such that A = USV^T * @param V such that A = USV^T */ -case class SVDecomposedMatrix(val U: RDD[MatrixEntry], - val S: RDD[MatrixEntry], - val V: RDD[MatrixEntry]) +case class SVDecomposedMatrix(val U: SparseMatrix, + val S: SparseMatrix, + val V: SparseMatrix) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala new file mode 100644 index 0000000000..cbd1a2a5a4 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SparseMatrix.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.linalg + +import org.apache.spark.rdd.RDD + + +/** + * Class that represents a sparse matrix + * + * @param data RDD of nonzero entries + * @param m number of rows + * @param n numner of columns + */ +case class SparseMatrix(val data: RDD[MatrixEntry], val m: Int, val n: Int) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala index 4126e819e3..f239e8505f 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala @@ -45,9 +45,12 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val EPSILON = 1e-4 // Return jblas matrix from sparse matrix RDD - def getDenseMatrix(matrix:RDD[MatrixEntry], m:Int, n:Int) : DoubleMatrix = { + def getDenseMatrix(matrix:SparseMatrix) : DoubleMatrix = { + val data = matrix.data + val m = matrix.m + val n = matrix.n val ret = DoubleMatrix.zeros(m, n) - matrix.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval)) + matrix.data.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval)) ret } @@ -67,24 +70,26 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) => MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten ) - val decomposed = SVD.sparseSVD(data, m, n, n) + val a = SparseMatrix(data, m, n) + + val decomposed = SVD.sparseSVD(a, n) val u = decomposed.U val v = decomposed.V - val s = decomposed.S + val s = decomposed.S - val densea = getDenseMatrix(data, m, n) + val densea = getDenseMatrix(a) val svd = Singular.sparseSVD(densea) - val retu = getDenseMatrix(u, m, n) - val rets = getDenseMatrix(s, n, n) - val retv = getDenseMatrix(v, n, n) + val retu = getDenseMatrix(u) + val rets = getDenseMatrix(s) + val retv = getDenseMatrix(v) // check individual decomposition assertMatrixEquals(retu, svd(0)) assertMatrixEquals(rets, DoubleMatrix.diag(svd(1))) assertMatrixEquals(retv, svd(2)) - // check multiplication guarantee + // check multiplication guarantee assertMatrixEquals(retu.mmul(rets).mmul(retv.transpose), densea) } @@ -95,20 +100,22 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { MatrixEntry(a + 1, b + 1, 1.0) }.flatten ) val k = 1 - val decomposed = SVD.sparseSVD(data, m, n, k) + val a = SparseMatrix(data, m, n) + + val decomposed = SVD.sparseSVD(a, k) val u = decomposed.U val s = decomposed.S val v = decomposed.V - val retrank = s.toArray.length + val retrank = s.data.toArray.length assert(retrank == 1, "rank returned not one") - val densea = getDenseMatrix(data, m, n) + val densea = getDenseMatrix(a) val svd = Singular.sparseSVD(densea) - val retu = getDenseMatrix(u, m, retrank) - val rets = getDenseMatrix(s, retrank, retrank) - val retv = getDenseMatrix(v, n, retrank) + val retu = getDenseMatrix(u) + val rets = getDenseMatrix(s) + val retv = getDenseMatrix(v) // check individual decomposition assertMatrixEquals(retu, svd(0).getColumn(0)) @@ -124,21 +131,22 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val n = 3 val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) => MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten ) + val a = SparseMatrix(data, m, n) val k = 1 // only one svalue above this - val decomposed = SVD.sparseSVD(data, m, n, k) + val decomposed = SVD.sparseSVD(a, k) val u = decomposed.U val s = decomposed.S val v = decomposed.V - val retrank = s.toArray.length + val retrank = s.data.toArray.length - val densea = getDenseMatrix(data, m, n) + val densea = getDenseMatrix(a) val svd = Singular.sparseSVD(densea) - val retu = getDenseMatrix(u, m, retrank) - val rets = getDenseMatrix(s, retrank, retrank) - val retv = getDenseMatrix(v, n, retrank) + val retu = getDenseMatrix(u) + val rets = getDenseMatrix(s) + val retv = getDenseMatrix(v) assert(retrank == 1, "rank returned not one") -- cgit v1.2.3 From cf5bd4ab2e9db72d3d9164053523e9e872d85b94 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Thu, 9 Jan 2014 22:39:41 -0800 Subject: fix example --- examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'examples') diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala index 4b9e674c68..d9c672f140 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala @@ -33,8 +33,8 @@ import org.apache.spark.mllib.linalg.SparseMatrix */ object SparkSVD { def main(args: Array[String]) { - if (args.length < 3) { - System.err.println("Usage: SVD ") + if (args.length != 2) { + System.err.println("Usage: SparkSVD ") System.exit(1) } val sc = new SparkContext(args(0), "SVD", -- cgit v1.2.3 From 1afdeaeb2f436084a6fbe8d73690f148f7b462c4 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Fri, 10 Jan 2014 21:30:54 -0800 Subject: add dimension parameters to example --- .../src/main/scala/org/apache/spark/examples/SparkSVD.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'examples') diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala index d9c672f140..ce7c1c48b5 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala @@ -29,12 +29,12 @@ import org.apache.spark.mllib.linalg.SparseMatrix * Where i is the column, j the row, and value is the matrix entry * * For example input file, see: - * mllib/data/als/test.data + * mllib/data/als/test.data (example is 4 x 4) */ object SparkSVD { def main(args: Array[String]) { - if (args.length != 2) { - System.err.println("Usage: SparkSVD ") + if (args.length != 4) { + System.err.println("Usage: SparkSVD m n") System.exit(1) } val sc = new SparkContext(args(0), "SVD", @@ -45,8 +45,8 @@ object SparkSVD { val parts = line.split(',') MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) } - val m = 4 - val n = 4 + val m = args(2).toInt + val n = args(3).toInt // recover largest singular vector val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1) -- cgit v1.2.3 From d28bf4182758f08862d5838c918756801a9d7327 Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Fri, 17 Jan 2014 13:39:40 -0800 Subject: changes from PR --- docs/mllib-guide.md | 5 +- .../scala/org/apache/spark/examples/SparkSVD.scala | 59 ---------------------- .../org/apache/spark/examples/mllib/SparkSVD.scala | 59 ++++++++++++++++++++++ 3 files changed, 62 insertions(+), 61 deletions(-) delete mode 100644 examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala create mode 100644 examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala (limited to 'examples') diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index a140ecb618..26350ce106 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -445,11 +445,12 @@ Given an *m x n* matrix *A*, we can compute matrices *U, S, V* such that *A = U * S * V^T* -There is no restriction on m, but we require n^2 doubles to fit in memory. +There is no restriction on m, but we require n^2 doubles to +fit in memory locally on one machine. Further, n should be less than m. The decomposition is computed by first computing *A^TA = V S^2 V^T*, -computing svd locally on that (since n x n is small), +computing SVD locally on that (since n x n is small), from which we recover S and V. Then we compute U via easy matrix multiplication as *U = A * V * S^-1* diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala deleted file mode 100644 index ce7c1c48b5..0000000000 --- a/examples/src/main/scala/org/apache/spark/examples/SparkSVD.scala +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples - -import org.apache.spark.SparkContext -import org.apache.spark.mllib.linalg.SVD -import org.apache.spark.mllib.linalg.MatrixEntry -import org.apache.spark.mllib.linalg.SparseMatrix - -/** - * Compute SVD of an example matrix - * Input file should be comma separated, 1 indexed of the form - * i,j,value - * Where i is the column, j the row, and value is the matrix entry - * - * For example input file, see: - * mllib/data/als/test.data (example is 4 x 4) - */ -object SparkSVD { - def main(args: Array[String]) { - if (args.length != 4) { - System.err.println("Usage: SparkSVD m n") - System.exit(1) - } - val sc = new SparkContext(args(0), "SVD", - System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) - - // Load and parse the data file - val data = sc.textFile(args(1)).map { line => - val parts = line.split(',') - MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) - } - val m = args(2).toInt - val n = args(3).toInt - - // recover largest singular vector - val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1) - val u = decomposed.U.data - val s = decomposed.S.data - val v = decomposed.V.data - - println("singular values = " + s.toArray.mkString) - } -} diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala new file mode 100644 index 0000000000..50e5f5bd87 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.mllib + +import org.apache.spark.SparkContext +import org.apache.spark.mllib.linalg.SVD +import org.apache.spark.mllib.linalg.MatrixEntry +import org.apache.spark.mllib.linalg.SparseMatrix + +/** + * Compute SVD of an example matrix + * Input file should be comma separated, 1 indexed of the form + * i,j,value + * Where i is the column, j the row, and value is the matrix entry + * + * For example input file, see: + * mllib/data/als/test.data (example is 4 x 4) + */ +object SparkSVD { + def main(args: Array[String]) { + if (args.length != 4) { + System.err.println("Usage: SparkSVD m n") + System.exit(1) + } + val sc = new SparkContext(args(0), "SVD", + System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) + + // Load and parse the data file + val data = sc.textFile(args(1)).map { line => + val parts = line.split(',') + MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) + } + val m = args(2).toInt + val n = args(3).toInt + + // recover largest singular vector + val decomposed = SVD.sparseSVD(SparseMatrix(data, m, n), 1) + val u = decomposed.U.data + val s = decomposed.S.data + val v = decomposed.V.data + + println("singular values = " + s.toArray.mkString) + } +} -- cgit v1.2.3 From 4e96757793e7aee165381f80a60b3f46f60c9ebc Mon Sep 17 00:00:00 2001 From: Reza Zadeh Date: Fri, 17 Jan 2014 14:33:03 -0800 Subject: make example 0-indexed --- examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'examples') diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala index 50e5f5bd87..19676fcc1a 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala @@ -43,7 +43,7 @@ object SparkSVD { // Load and parse the data file val data = sc.textFile(args(1)).map { line => val parts = line.split(',') - MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) + MatrixEntry(parts(0).toInt - 1, parts(1).toInt - 1, parts(2).toDouble) } val m = args(2).toInt val n = args(3).toInt -- cgit v1.2.3