aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-10-07 15:50:45 -0700
committerXiangrui Meng <meng@databricks.com>2015-10-07 15:50:45 -0700
commit7bf07faa716bd6a01252c5e888d0956096bde026 (patch)
tree9acef1a26d9f429210e499a7f63d28207c025daf /mllib
parent6dbfd7ecf41297213f4ce8024d00c40808c5ac8f (diff)
downloadspark-7bf07faa716bd6a01252c5e888d0956096bde026.tar.gz
spark-7bf07faa716bd6a01252c5e888d0956096bde026.tar.bz2
spark-7bf07faa716bd6a01252c5e888d0956096bde026.zip
[SPARK-10490] [ML] Consolidate the Cholesky solvers in WeightedLeastSquares and ALS
Consolidate the Cholesky solvers in WeightedLeastSquares and ALS. Author: Yanbo Liang <ybliang8@gmail.com> Closes #8936 from yanboliang/spark-10490.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala23
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala43
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala6
4 files changed, 47 insertions, 35 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 4374e99631..d7eaa5a926 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -17,12 +17,8 @@
package org.apache.spark.ml.optim
-import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
-import org.netlib.util.intW
-
import org.apache.spark.Logging
import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.rdd.RDD
/**
@@ -110,7 +106,7 @@ private[ml] class WeightedLeastSquares(
j += 1
}
- val x = choleskySolve(aaBar.values, abBar)
+ val x = new DenseVector(CholeskyDecomposition.solve(aaBar.values, abBar.values))
// compute intercept
val intercept = if (fitIntercept) {
@@ -121,23 +117,6 @@ private[ml] class WeightedLeastSquares(
new WeightedLeastSquaresModel(x, intercept)
}
-
- /**
- * Solves a symmetric positive definite linear system via Cholesky factorization.
- * The input arguments are modified in-place to store the factorization and the solution.
- * @param A the upper triangular part of A
- * @param bx right-hand side
- * @return the solution vector
- */
- // TODO: SPARK-10490 - consolidate this and the Cholesky solver in ALS
- private def choleskySolve(A: Array[Double], bx: DenseVector): DenseVector = {
- val k = bx.size
- val info = new intW(0)
- lapack.dppsv("U", k, 1, A, bx.values, k, info)
- val code = info.`val`
- assert(code == 0, s"lapack.dpotrs returned $code.")
- bx
- }
}
private[ml] object WeightedLeastSquares {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index f6f5281f71..535f266b9a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -26,9 +26,7 @@ import scala.util.Sorting
import scala.util.hashing.byteswap64
import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
import org.apache.hadoop.fs.{FileSystem, Path}
-import org.netlib.util.intW
import org.apache.spark.{Logging, Partitioner}
import org.apache.spark.annotation.{DeveloperApi, Experimental}
@@ -36,6 +34,7 @@ import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
+import org.apache.spark.mllib.linalg.CholeskyDecomposition
import org.apache.spark.mllib.optimization.NNLS
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
@@ -366,8 +365,6 @@ object ALS extends Logging {
/** Cholesky solver for least square problems. */
private[recommendation] class CholeskySolver extends LeastSquaresNESolver {
- private val upper = "U"
-
/**
* Solves a least squares problem with L2 regularization:
*
@@ -387,10 +384,7 @@ object ALS extends Logging {
i += j
j += 1
}
- val info = new intW(0)
- lapack.dppsv(upper, k, 1, ne.ata, ne.atb, k, info)
- val code = info.`val`
- assert(code == 0, s"lapack.dppsv returned $code.")
+ CholeskyDecomposition.solve(ne.ata, ne.atb)
val x = new Array[Float](k)
i = 0
while (i < k) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
new file mode 100644
index 0000000000..66eb40b6f4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg
+
+import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
+import org.netlib.util.intW
+
+/**
+ * Compute Cholesky decomposition.
+ */
+private[spark] object CholeskyDecomposition {
+
+ /**
+ * Solves a symmetric positive definite linear system via Cholesky factorization.
+ * The input arguments are modified in-place to store the factorization and the solution.
+ * @param A the upper triangular part of A
+ * @param bx right-hand side
+ * @return the solution array
+ */
+ def solve(A: Array[Double], bx: Array[Double]): Array[Double] = {
+ val k = bx.size
+ val info = new intW(0)
+ lapack.dppsv("U", k, 1, A, bx, k, info)
+ val code = info.`val`
+ assert(code == 0, s"lapack.dpotrs returned $code.")
+ bx
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
index ae3ba3099c..863abe86d3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
@@ -21,13 +21,9 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
import com.github.fommil.netlib.ARPACK
import org.netlib.util.{intW, doubleW}
-import org.apache.spark.annotation.Experimental
-
/**
- * :: Experimental ::
* Compute eigen-decomposition.
*/
-@Experimental
private[mllib] object EigenValueDecomposition {
/**
* Compute the leading k eigenvalues and eigenvectors on a symmetric square matrix using ARPACK.
@@ -46,7 +42,7 @@ private[mllib] object EigenValueDecomposition {
* for more details). The maximum number of Arnoldi update iterations is set to 300 in this
* function.
*/
- private[mllib] def symmetricEigs(
+ def symmetricEigs(
mul: BDV[Double] => BDV[Double],
n: Int,
k: Int,