diff options
author | jiangxingbo <jiangxb1987@gmail.com> | 2016-09-18 16:04:37 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-09-18 16:04:37 +0100 |
commit | 5d3f4615f8d0a19b97cde5ae603f74aef2cc2fd2 (patch) | |
tree | 72af3a5ed8691b346a07ece1acb49926b5d7ad98 | |
parent | 3fe630d314cf50d69868b7707ac8d8d2027080b8 (diff) | |
download | spark-5d3f4615f8d0a19b97cde5ae603f74aef2cc2fd2.tar.gz spark-5d3f4615f8d0a19b97cde5ae603f74aef2cc2fd2.tar.bz2 spark-5d3f4615f8d0a19b97cde5ae603f74aef2cc2fd2.zip |
[SPARK-17506][SQL] Improve the check double values equality rule.
## What changes were proposed in this pull request?
In `ExpressionEvalHelper`, we check the equality between two double values by comparing whether the expected value is within the range [target - tolerance, target + tolerance], but this can cause a negative false when the compared numerics are very large.
Beforeļ¼
```
val1 = 1.6358558070241E306
val2 = 1.6358558070240974E306
ExpressionEvalHelper.compareResults(val1, val2)
false
```
In fact, `val1` and `val2` are but with different precisions, we should tolerant this case by comparing with percentage range, eg.,expected is within range [target - target * tolerance_percentage, target + target * tolerance_percentage].
After:
```
val1 = 1.6358558070241E306
val2 = 1.6358558070240974E306
ExpressionEvalHelper.compareResults(val1, val2)
true
```
## How was this patch tested?
Exsiting testcases.
Author: jiangxingbo <jiangxb1987@gmail.com>
Closes #15059 from jiangxb1987/deq.
2 files changed, 30 insertions, 7 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 687387507e..5c9824289b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -170,11 +170,9 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Remainder(positiveLongLit, positiveLongLit), 0L) checkEvaluation(Remainder(negativeLongLit, negativeLongLit), 0L) - // TODO: the following lines would fail the test due to inconsistency result of interpret - // and codegen for remainder between giant values, seems like a numeric stability issue - // DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => - // checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) - // } + DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) + } } test("Abs") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 668543a28b..f0c149c02b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.scalacheck.Gen import org.scalactic.TripleEqualsSupport.Spread +import org.scalatest.exceptions.TestFailedException import org.scalatest.prop.GeneratorDrivenPropertyChecks import org.apache.spark.SparkFunSuite @@ -289,13 +290,37 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { (result, expected) match { case (result: Array[Byte], expected: Array[Byte]) => java.util.Arrays.equals(result, expected) - case (result: Double, expected: Spread[Double @unchecked]) => - expected.asInstanceOf[Spread[Double]].isWithin(result) case (result: Double, expected: Double) if result.isNaN && expected.isNaN => true + case (result: Double, expected: Double) => + relativeErrorComparison(result, expected) case (result: Float, expected: Float) if result.isNaN && expected.isNaN => true case _ => result == expected } } + + /** + * Private helper function for comparing two values using relative tolerance. + * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, + * the relative tolerance is meaningless, so the exception will be raised to warn users. + * + * TODO: this duplicates functions in spark.ml.util.TestingUtils.relTol and + * spark.mllib.util.TestingUtils.relTol, they could be moved to common utils sub module for the + * whole spark project which does not depend on other modules. See more detail in discussion: + * https://github.com/apache/spark/pull/15059#issuecomment-246940444 + */ + private def relativeErrorComparison(x: Double, y: Double, eps: Double = 1E-8): Boolean = { + val absX = math.abs(x) + val absY = math.abs(y) + val diff = math.abs(x - y) + if (x == y) { + true + } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) { + throw new TestFailedException( + s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0) + } else { + diff < eps * math.min(absX, absY) + } + } } |