aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala
diff options
context:
space:
mode:
authorReza Zadeh <rizlar@gmail.com>2013-12-26 16:12:40 -0500
committerReza Zadeh <rizlar@gmail.com>2013-12-26 16:12:40 -0500
commit6e740cc90131b29ebc17e32c66ea16727e5dcc9f (patch)
tree95a0ae23a1f69b0880fe561c3d21610e8af9c439 /mllib/src/main/scala
parent1a173f00bd96dcfbfc11866b1f2ad76a50ffc399 (diff)
downloadspark-6e740cc90131b29ebc17e32c66ea16727e5dcc9f.tar.gz
spark-6e740cc90131b29ebc17e32c66ea16727e5dcc9f.tar.bz2
spark-6e740cc90131b29ebc17e32c66ea16727e5dcc9f.zip
Some documentation
Diffstat (limited to 'mllib/src/main/scala')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/sparsesvd.scala47
1 files changed, 47 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/sparsesvd.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/sparsesvd.scala
index 384bf9d33f..99a1785074 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/sparsesvd.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/sparsesvd.scala
@@ -1,7 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
import org.apache.spark.SparkContext
import org.jblas.{DoubleMatrix, Singular, MatrixFunctions}
+
+/**
+ * Singular Value Decomposition for Tall and Skinny matrices.
+ * Given an m x n matrix A, this will compute matrices U, S, V such that
+ * A = U * S * V'
+ *
+ * There is no restriction on m, but we require n doubles to be held in memory.
+ * Further, n should be less than m.
+ *
+ * This is computed by first computing A'A = V S^2 V',
+ * computing locally on that (since n x n is small),
+ * from which we recover S and V.
+ * Then we compute U via easy matrix multiplication
+ * as U = A * V * S^-1
+ *
+ * Only singular vectors associated with singular values
+ * greater or equal to MIN_SVALUE are recovered. If there are k
+ * such values, then the dimensions of the return will be:
+ *
+ * S is k x k and diagonal, holding the singular values on diagonal
+ * U is m x k and satisfies U'U = eye(k,k)
+ * V is n x k and satisfies V'V = eye(k,k)
+ *
+ * All input and output is expected in sparse matrix format, 1-indexed
+ * as tuples of the form ((i,j),value) all in RDDs
+ */
+
+
// arguments
val MIN_SVALUE = 0.01 // minimum singular value to recover
val m = 100000