aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/testthat/test_sparkSQL.R
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-06-17 16:07:33 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-06-17 16:07:33 -0700
commit7d65a0db4a231882200513836f2720f59b35f364 (patch)
treed178328d8cdeeda2078bf371068985891567348d /R/pkg/inst/tests/testthat/test_sparkSQL.R
parentef3cc4fc096e831823d62af4fd2a12ae88d434b4 (diff)
downloadspark-7d65a0db4a231882200513836f2720f59b35f364.tar.gz
spark-7d65a0db4a231882200513836f2720f59b35f364.tar.bz2
spark-7d65a0db4a231882200513836f2720f59b35f364.zip
[SPARK-16005][R] Add `randomSplit` to SparkR
## What changes were proposed in this pull request? This PR adds `randomSplit` to SparkR for API parity. ## How was this patch tested? Pass the Jenkins tests (with new testcase.) Author: Dongjoon Hyun <dongjoon@apache.org> Closes #13721 from dongjoon-hyun/SPARK-16005.
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R18
1 files changed, 18 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 7aa03a9048..607bd9c12f 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2280,6 +2280,24 @@ test_that("createDataFrame sqlContext parameter backward compatibility", {
expect_equal(collect(before), collect(after))
})
+test_that("randomSplit", {
+ num <- 4000
+ df <- createDataFrame(data.frame(id = 1:num))
+
+ weights <- c(2, 3, 5)
+ df_list <- randomSplit(df, weights)
+ expect_equal(length(weights), length(df_list))
+ counts <- sapply(df_list, count)
+ expect_equal(num, sum(counts))
+ expect_true(all(sapply(abs(counts / num - weights / sum(weights)), function(e) { e < 0.05 })))
+
+ df_list <- randomSplit(df, weights, 0)
+ expect_equal(length(weights), length(df_list))
+ counts <- sapply(df_list, count)
+ expect_equal(num, sum(counts))
+ expect_true(all(sapply(abs(counts / num - weights / sum(weights)), function(e) { e < 0.05 })))
+})
+
unlink(parquetPath)
unlink(jsonPath)
unlink(jsonPathNa)