blob: 6263ee3518d8c21beb081d4c26dd0aa837f683d5 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
package spark.partial
import cern.jet.stat.Probability
/**
* A utility class for caching Student's T distribution values for a given confidence level
* and various sample sizes. This is used by the MeanEvaluator to efficiently calculate
* confidence intervals for many keys.
*/
class StudentTCacher(confidence: Double) {
val NORMAL_APPROX_SAMPLE_SIZE = 100 // For samples bigger than this, use Gaussian approximation
val normalApprox = Probability.normalInverse(1 - (1 - confidence) / 2)
val cache = Array.fill[Double](NORMAL_APPROX_SAMPLE_SIZE)(-1.0)
def get(sampleSize: Long): Double = {
if (sampleSize >= NORMAL_APPROX_SAMPLE_SIZE) {
normalApprox
} else {
val size = sampleSize.toInt
if (cache(size) < 0) {
cache(size) = Probability.studentTInverse(1 - confidence, size - 1)
}
cache(size)
}
}
}
|