diff options
author | Sun Rui <rui.sun@intel.com> | 2016-01-19 16:37:18 -0800 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2016-01-19 16:37:18 -0800 |
commit | 3ac648289c543b56937d67b5df5c3e228ef47cbd (patch) | |
tree | 74c38841840ce85970ce4883eec9a2c0cffbc546 /R/pkg/inst/tests/testthat/test_sparkSQL.R | |
parent | 37fefa66cbd61bc592aba42b0ed3aefc0cf3abb0 (diff) | |
download | spark-3ac648289c543b56937d67b5df5c3e228ef47cbd.tar.gz spark-3ac648289c543b56937d67b5df5c3e228ef47cbd.tar.bz2 spark-3ac648289c543b56937d67b5df5c3e228ef47cbd.zip |
[SPARK-12337][SPARKR] Implement dropDuplicates() method of DataFrame in SparkR.
Author: Sun Rui <rui.sun@intel.com>
Closes #10309 from sun-rui/SPARK-12337.
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 38 |
1 files changed, 37 insertions, 1 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 67ecdbc522..6610734cf4 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -734,7 +734,7 @@ test_that("head() and first() return the correct data", { expect_equal(ncol(testFirst), 2) }) -test_that("distinct() and unique on DataFrames", { +test_that("distinct(), unique() and dropDuplicates() on DataFrames", { lines <- c("{\"name\":\"Michael\"}", "{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"Justin\", \"age\":19}", @@ -750,6 +750,42 @@ test_that("distinct() and unique on DataFrames", { uniques2 <- unique(df) expect_is(uniques2, "DataFrame") expect_equal(count(uniques2), 3) + + # Test dropDuplicates() + df <- createDataFrame( + sqlContext, + list( + list(2, 1, 2), list(1, 1, 1), + list(1, 2, 1), list(2, 1, 2), + list(2, 2, 2), list(2, 2, 1), + list(2, 1, 1), list(1, 1, 2), + list(1, 2, 2), list(1, 2, 1)), + schema = c("key", "value1", "value2")) + result <- collect(dropDuplicates(df)) + expected <- rbind.data.frame( + c(1, 1, 1), c(1, 1, 2), c(1, 2, 1), + c(1, 2, 2), c(2, 1, 1), c(2, 1, 2), + c(2, 2, 1), c(2, 2, 2)) + names(expected) <- c("key", "value1", "value2") + expect_equivalent( + result[order(result$key, result$value1, result$value2),], + expected) + + result <- collect(dropDuplicates(df, c("key", "value1"))) + expected <- rbind.data.frame( + c(1, 1, 1), c(1, 2, 1), c(2, 1, 2), c(2, 2, 2)) + names(expected) <- c("key", "value1", "value2") + expect_equivalent( + result[order(result$key, result$value1, result$value2),], + expected) + + result <- collect(dropDuplicates(df, "key")) + expected <- rbind.data.frame( + c(1, 1, 1), c(2, 1, 2)) + names(expected) <- c("key", "value1", "value2") + expect_equivalent( + result[order(result$key, result$value1, result$value2),], + expected) }) test_that("sample on a DataFrame", { |