diff options
author | Nick Lavers <nick.lavers@videoamp.com> | 2016-08-19 10:11:59 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-08-19 10:11:59 +0100 |
commit | 5377fc62360d5e9b5c94078e41d10a96e0e8a535 (patch) | |
tree | 1998db20af8d7cc93a2b00308c0f5e8e2b3166a9 /R | |
parent | 287bea13050b8eedc3b8b6b3491f1b5e5bc24d7a (diff) | |
download | spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.gz spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.bz2 spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.zip |
[SPARK-16961][CORE] Fixed off-by-one error that biased randomizeInPlace
JIRA issue link:
https://issues.apache.org/jira/browse/SPARK-16961
Changed one line of Utils.randomizeInPlace to allow elements to stay in place.
Created a unit test that runs a Pearson's chi squared test to determine whether the output diverges significantly from a uniform distribution.
Author: Nick Lavers <nick.lavers@videoamp.com>
Closes #14551 from nicklavers/SPARK-16961-randomizeInPlace.
Diffstat (limited to 'R')
-rw-r--r-- | R/pkg/inst/tests/testthat/test_mllib.R | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index 8c380fbf15..dfb7a185cd 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -546,15 +546,15 @@ test_that("spark.gaussianMixture", { df <- createDataFrame(data, c("x1", "x2")) model <- spark.gaussianMixture(df, ~ x1 + x2, k = 2) stats <- summary(model) - rLambda <- c(0.4, 0.6) - rMu <- c(-0.2614822, 0.5128697, 2.647284, 4.544682) - rSigma <- c(0.08427399, 0.00548772, 0.00548772, 0.09090715, - 0.1641373, -0.1673806, -0.1673806, 0.7508951) - expect_equal(stats$lambda, rLambda) + rLambda <- c(0.50861, 0.49139) + rMu <- c(0.267, 1.195, 2.743, 4.730) + rSigma <- c(1.099, 1.339, 1.339, 1.798, + 0.145, -0.309, -0.309, 0.716) + expect_equal(stats$lambda, rLambda, tolerance = 1e-3) expect_equal(unlist(stats$mu), rMu, tolerance = 1e-3) expect_equal(unlist(stats$sigma), rSigma, tolerance = 1e-3) p <- collect(select(predict(model, df), "prediction")) - expect_equal(p$prediction, c(0, 0, 0, 0, 1, 1, 1, 1, 1, 1)) + expect_equal(p$prediction, c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1)) # Test model save/load modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp") |