aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-09-21 18:56:16 +0000
committerDB Tsai <dbt@netflix.com>2016-09-21 18:56:16 +0000
commitb4a4421b610e776e5280fd5e7453f937f806cbd1 (patch)
treef95410d804db99206608e933e712014faf0fc4c0
parentd7ee12211a99efae6f7395e47089236838461d61 (diff)
downloadspark-b4a4421b610e776e5280fd5e7453f937f806cbd1.tar.gz
spark-b4a4421b610e776e5280fd5e7453f937f806cbd1.tar.bz2
spark-b4a4421b610e776e5280fd5e7453f937f806cbd1.zip
[SPARK-11918][ML] Better error from WLS for cases like singular input
## What changes were proposed in this pull request? Update error handling for Cholesky decomposition to provide a little more info when input is singular. ## How was this patch tested? New test case; jenkins tests. Author: Sean Owen <sowen@cloudera.com> Closes #15177 from srowen/SPARK-11918.
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala19
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala20
2 files changed, 35 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
index e4494792bb..08f8f19c1e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
@@ -36,8 +36,7 @@ private[spark] object CholeskyDecomposition {
val k = bx.length
val info = new intW(0)
lapack.dppsv("U", k, 1, A, bx, k, info)
- val code = info.`val`
- assert(code == 0, s"lapack.dppsv returned $code.")
+ checkReturnValue(info, "dppsv")
bx
}
@@ -52,8 +51,20 @@ private[spark] object CholeskyDecomposition {
def inverse(UAi: Array[Double], k: Int): Array[Double] = {
val info = new intW(0)
lapack.dpptri("U", k, UAi, info)
- val code = info.`val`
- assert(code == 0, s"lapack.dpptri returned $code.")
+ checkReturnValue(info, "dpptri")
UAi
}
+
+ private def checkReturnValue(info: intW, method: String): Unit = {
+ info.`val` match {
+ case code if code < 0 =>
+ throw new IllegalStateException(s"LAPACK.$method returned $code; arg ${-code} is illegal")
+ case code if code > 0 =>
+ throw new IllegalArgumentException(
+ s"LAPACK.$method returned $code because A is not positive definite. Is A derived from " +
+ "a singular matrix (e.g. collinear column values)?")
+ case _ => // do nothing
+ }
+ }
+
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
index c8de796b2d..2cb1af0dee 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
@@ -60,6 +60,26 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
), 2)
}
+ test("two collinear features result in error with no regularization") {
+ val singularInstances = sc.parallelize(Seq(
+ Instance(1.0, 1.0, Vectors.dense(1.0, 2.0)),
+ Instance(2.0, 1.0, Vectors.dense(2.0, 4.0)),
+ Instance(3.0, 1.0, Vectors.dense(3.0, 6.0)),
+ Instance(4.0, 1.0, Vectors.dense(4.0, 8.0))
+ ), 2)
+
+ intercept[IllegalArgumentException] {
+ new WeightedLeastSquares(
+ false, regParam = 0.0, standardizeFeatures = false,
+ standardizeLabel = false).fit(singularInstances)
+ }
+
+ // Should not throw an exception
+ new WeightedLeastSquares(
+ false, regParam = 1.0, standardizeFeatures = false,
+ standardizeLabel = false).fit(singularInstances)
+ }
+
test("WLS against lm") {
/*
R code: