diff options
author | Sean Owen <sowen@cloudera.com> | 2016-09-21 18:56:16 +0000 |
---|---|---|
committer | DB Tsai <dbt@netflix.com> | 2016-09-21 18:56:16 +0000 |
commit | b4a4421b610e776e5280fd5e7453f937f806cbd1 (patch) | |
tree | f95410d804db99206608e933e712014faf0fc4c0 /mllib | |
parent | d7ee12211a99efae6f7395e47089236838461d61 (diff) | |
download | spark-b4a4421b610e776e5280fd5e7453f937f806cbd1.tar.gz spark-b4a4421b610e776e5280fd5e7453f937f806cbd1.tar.bz2 spark-b4a4421b610e776e5280fd5e7453f937f806cbd1.zip |
[SPARK-11918][ML] Better error from WLS for cases like singular input
## What changes were proposed in this pull request?
Update error handling for Cholesky decomposition to provide a little more info when input is singular.
## How was this patch tested?
New test case; jenkins tests.
Author: Sean Owen <sowen@cloudera.com>
Closes #15177 from srowen/SPARK-11918.
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala | 19 | ||||
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala | 20 |
2 files changed, 35 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala index e4494792bb..08f8f19c1e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -36,8 +36,7 @@ private[spark] object CholeskyDecomposition { val k = bx.length val info = new intW(0) lapack.dppsv("U", k, 1, A, bx, k, info) - val code = info.`val` - assert(code == 0, s"lapack.dppsv returned $code.") + checkReturnValue(info, "dppsv") bx } @@ -52,8 +51,20 @@ private[spark] object CholeskyDecomposition { def inverse(UAi: Array[Double], k: Int): Array[Double] = { val info = new intW(0) lapack.dpptri("U", k, UAi, info) - val code = info.`val` - assert(code == 0, s"lapack.dpptri returned $code.") + checkReturnValue(info, "dpptri") UAi } + + private def checkReturnValue(info: intW, method: String): Unit = { + info.`val` match { + case code if code < 0 => + throw new IllegalStateException(s"LAPACK.$method returned $code; arg ${-code} is illegal") + case code if code > 0 => + throw new IllegalArgumentException( + s"LAPACK.$method returned $code because A is not positive definite. Is A derived from " + + "a singular matrix (e.g. collinear column values)?") + case _ => // do nothing + } + } + } diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala index c8de796b2d..2cb1af0dee 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala @@ -60,6 +60,26 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext ), 2) } + test("two collinear features result in error with no regularization") { + val singularInstances = sc.parallelize(Seq( + Instance(1.0, 1.0, Vectors.dense(1.0, 2.0)), + Instance(2.0, 1.0, Vectors.dense(2.0, 4.0)), + Instance(3.0, 1.0, Vectors.dense(3.0, 6.0)), + Instance(4.0, 1.0, Vectors.dense(4.0, 8.0)) + ), 2) + + intercept[IllegalArgumentException] { + new WeightedLeastSquares( + false, regParam = 0.0, standardizeFeatures = false, + standardizeLabel = false).fit(singularInstances) + } + + // Should not throw an exception + new WeightedLeastSquares( + false, regParam = 1.0, standardizeFeatures = false, + standardizeLabel = false).fit(singularInstances) + } + test("WLS against lm") { /* R code: |