aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorNick Lavers <nick.lavers@videoamp.com>2016-08-19 10:11:59 +0100
committerSean Owen <sowen@cloudera.com>2016-08-19 10:11:59 +0100
commit5377fc62360d5e9b5c94078e41d10a96e0e8a535 (patch)
tree1998db20af8d7cc93a2b00308c0f5e8e2b3166a9 /R
parent287bea13050b8eedc3b8b6b3491f1b5e5bc24d7a (diff)
downloadspark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.gz
spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.bz2
spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.zip
[SPARK-16961][CORE] Fixed off-by-one error that biased randomizeInPlace
JIRA issue link: https://issues.apache.org/jira/browse/SPARK-16961 Changed one line of Utils.randomizeInPlace to allow elements to stay in place. Created a unit test that runs a Pearson's chi squared test to determine whether the output diverges significantly from a uniform distribution. Author: Nick Lavers <nick.lavers@videoamp.com> Closes #14551 from nicklavers/SPARK-16961-randomizeInPlace.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/inst/tests/testthat/test_mllib.R12
1 files changed, 6 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 8c380fbf15..dfb7a185cd 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -546,15 +546,15 @@ test_that("spark.gaussianMixture", {
df <- createDataFrame(data, c("x1", "x2"))
model <- spark.gaussianMixture(df, ~ x1 + x2, k = 2)
stats <- summary(model)
- rLambda <- c(0.4, 0.6)
- rMu <- c(-0.2614822, 0.5128697, 2.647284, 4.544682)
- rSigma <- c(0.08427399, 0.00548772, 0.00548772, 0.09090715,
- 0.1641373, -0.1673806, -0.1673806, 0.7508951)
- expect_equal(stats$lambda, rLambda)
+ rLambda <- c(0.50861, 0.49139)
+ rMu <- c(0.267, 1.195, 2.743, 4.730)
+ rSigma <- c(1.099, 1.339, 1.339, 1.798,
+ 0.145, -0.309, -0.309, 0.716)
+ expect_equal(stats$lambda, rLambda, tolerance = 1e-3)
expect_equal(unlist(stats$mu), rMu, tolerance = 1e-3)
expect_equal(unlist(stats$sigma), rSigma, tolerance = 1e-3)
p <- collect(select(predict(model, df), "prediction"))
- expect_equal(p$prediction, c(0, 0, 0, 0, 1, 1, 1, 1, 1, 1))
+ expect_equal(p$prediction, c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1))
# Test model save/load
modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp")