[SPARK-16961][CORE] Fixed off-by-one error that biased randomizeInPlace

JIRA issue link: https://issues.apache.org/jira/browse/SPARK-16961 Changed one line of Utils.randomizeInPlace to allow elements to stay in place. Created a unit test that runs a Pearson's chi squared test to determine whether the output diverges significantly from a uniform distribution. Author: Nick Lavers <nick.lavers@videoamp.com> Closes #14551 from nicklavers/SPARK-16961-randomizeInPlace.
author: Nick Lavers <nick.lavers@videoamp.com> 2016-08-19 10:11:59 +0100
committer: Sean Owen <sowen@cloudera.com> 2016-08-19 10:11:59 +0100
commit: 5377fc62360d5e9b5c94078e41d10a96e0e8a535 (patch)
tree: 1998db20af8d7cc93a2b00308c0f5e8e2b3166a9 /R
parent: 287bea13050b8eedc3b8b6b3491f1b5e5bc24d7a (diff)
download: spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.gz
spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.bz2
spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.zip
1 files changed, 6 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 8c380fbf15..dfb7a185cd 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -546,15 +546,15 @@ test_that("spark.gaussianMixture", {
   df <- createDataFrame(data, c("x1", "x2"))
   model <- spark.gaussianMixture(df, ~ x1 + x2, k = 2)
   stats <- summary(model)
-  rLambda <- c(0.4, 0.6)
-  rMu <- c(-0.2614822, 0.5128697, 2.647284, 4.544682)
-  rSigma <- c(0.08427399, 0.00548772, 0.00548772, 0.09090715,
-              0.1641373, -0.1673806, -0.1673806, 0.7508951)
-  expect_equal(stats$lambda, rLambda)
+  rLambda <- c(0.50861, 0.49139)
+  rMu <- c(0.267, 1.195, 2.743, 4.730)
+  rSigma <- c(1.099, 1.339, 1.339, 1.798,
+              0.145, -0.309, -0.309, 0.716)
+  expect_equal(stats$lambda, rLambda, tolerance = 1e-3)
   expect_equal(unlist(stats$mu), rMu, tolerance = 1e-3)
   expect_equal(unlist(stats$sigma), rSigma, tolerance = 1e-3)
   p <- collect(select(predict(model, df), "prediction"))
-  expect_equal(p$prediction, c(0, 0, 0, 0, 1, 1, 1, 1, 1, 1))
+  expect_equal(p$prediction, c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp")
author	Nick Lavers <nick.lavers@videoamp.com>	2016-08-19 10:11:59 +0100
committer	Sean Owen <sowen@cloudera.com>	2016-08-19 10:11:59 +0100
commit	5377fc62360d5e9b5c94078e41d10a96e0e8a535 (patch)
tree	1998db20af8d7cc93a2b00308c0f5e8e2b3166a9 /R
parent	287bea13050b8eedc3b8b6b3491f1b5e5bc24d7a (diff)
download	spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.gz spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.tar.bz2 spark-5377fc62360d5e9b5c94078e41d10a96e0e8a535.zip