aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala/org
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-10-27 23:07:37 -0700
committerXiangrui Meng <meng@databricks.com>2015-10-27 23:07:37 -0700
commit826e1e304b57abbc56b8b7ffd663d53942ab3c7c (patch)
tree379cecd7931154b2ce835302106139f06af613be /mllib/src/main/scala/org
parentd9c6039897236c3f1e4503aa95c5c9b07b32eadd (diff)
downloadspark-826e1e304b57abbc56b8b7ffd663d53942ab3c7c.tar.gz
spark-826e1e304b57abbc56b8b7ffd663d53942ab3c7c.tar.bz2
spark-826e1e304b57abbc56b8b7ffd663d53942ab3c7c.zip
[SPARK-11302][MLLIB] 2) Multivariate Gaussian Model with Covariance matrix returns incorrect answer in some cases
Fix computation of root-sigma-inverse in multivariate Gaussian; add a test and fix related Python mixture model test. Supersedes https://github.com/apache/spark/pull/9293 Author: Sean Owen <sowen@cloudera.com> Closes #9309 from srowen/SPARK-11302.2.
Diffstat (limited to 'mllib/src/main/scala/org')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala8
1 files changed, 4 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
index 92a5af708d..0724af9308 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -56,7 +56,7 @@ class MultivariateGaussian @Since("1.3.0") (
/**
* Compute distribution dependent constants:
- * rootSigmaInv = D^(-1/2)^ * U, where sigma = U * D * U.t
+ * rootSigmaInv = D^(-1/2)^ * U.t, where sigma = U * D * U.t
* u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^)
*/
private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants
@@ -104,11 +104,11 @@ class MultivariateGaussian @Since("1.3.0") (
*
* sigma = U * D * U.t
* inv(Sigma) = U * inv(D) * U.t
- * = (D^{-1/2}^ * U).t * (D^{-1/2}^ * U)
+ * = (D^{-1/2}^ * U.t).t * (D^{-1/2}^ * U.t)
*
* and thus
*
- * -0.5 * (x-mu).t * inv(Sigma) * (x-mu) = -0.5 * norm(D^{-1/2}^ * U * (x-mu))^2^
+ * -0.5 * (x-mu).t * inv(Sigma) * (x-mu) = -0.5 * norm(D^{-1/2}^ * U.t * (x-mu))^2^
*
* To guard against singular covariance matrices, this method computes both the
* pseudo-determinant and the pseudo-inverse (Moore-Penrose). Singular values are considered
@@ -130,7 +130,7 @@ class MultivariateGaussian @Since("1.3.0") (
// by inverting the square root of all non-zero values
val pinvS = diag(new DBV(d.map(v => if (v > tol) math.sqrt(1.0 / v) else 0.0).toArray))
- (pinvS * u, -0.5 * (mu.size * math.log(2.0 * math.Pi) + logPseudoDetSigma))
+ (pinvS * u.t, -0.5 * (mu.size * math.log(2.0 * math.Pi) + logPseudoDetSigma))
} catch {
case uex: UnsupportedOperationException =>
throw new IllegalArgumentException("Covariance matrix has no non-zero singular values")