aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-08-28 00:37:50 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-08-28 00:37:50 -0700
commit2f99c37273c1d82e2ba39476e4429ea4aaba7ec6 (patch)
tree176086f0e2261f0e6dba9cf22997f451def33f8f /R/pkg
parent7583681e6b0824d7eed471dc4d8fa0b2addf9ffc (diff)
downloadspark-2f99c37273c1d82e2ba39476e4429ea4aaba7ec6.tar.gz
spark-2f99c37273c1d82e2ba39476e4429ea4aaba7ec6.tar.bz2
spark-2f99c37273c1d82e2ba39476e4429ea4aaba7ec6.zip
[SPARK-10328] [SPARKR] Fix generic for na.omit
S3 function is at https://stat.ethz.ch/R-manual/R-patched/library/stats/html/na.fail.html Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Author: Shivaram Venkataraman <shivaram.venkataraman@gmail.com> Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #8495 from shivaram/na-omit-fix.
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/R/DataFrame.R6
-rw-r--r--R/pkg/R/generics.R2
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R23
3 files changed, 26 insertions, 5 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index dd8126aebf..74de7c81e3 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1699,9 +1699,9 @@ setMethod("dropna",
#' @name na.omit
#' @export
setMethod("na.omit",
- signature(x = "DataFrame"),
- function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
- dropna(x, how, minNonNulls, cols)
+ signature(object = "DataFrame"),
+ function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+ dropna(object, how, minNonNulls, cols)
})
#' fillna
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a829d46c18..b578b8789d 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -413,7 +413,7 @@ setGeneric("dropna",
#' @rdname nafunctions
#' @export
setGeneric("na.omit",
- function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+ function(object, ...) {
standardGeneric("na.omit")
})
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 4b672e115f..933b11c8ee 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1083,7 +1083,7 @@ test_that("describe() and summarize() on a DataFrame", {
expect_equal(collect(stats2)[5, "age"], "30")
})
-test_that("dropna() on a DataFrame", {
+test_that("dropna() and na.omit() on a DataFrame", {
df <- jsonFile(sqlContext, jsonPathNa)
rows <- collect(df)
@@ -1092,6 +1092,8 @@ test_that("dropna() on a DataFrame", {
expected <- rows[!is.na(rows$name),]
actual <- collect(dropna(df, cols = "name"))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, cols = "name"))
+ expect_identical(expected, actual)
expected <- rows[!is.na(rows$age),]
actual <- collect(dropna(df, cols = "age"))
@@ -1101,48 +1103,67 @@ test_that("dropna() on a DataFrame", {
expect_identical(expected$age, actual$age)
expect_identical(expected$height, actual$height)
expect_identical(expected$name, actual$name)
+ actual <- collect(na.omit(df, cols = "age"))
expected <- rows[!is.na(rows$age) & !is.na(rows$height),]
actual <- collect(dropna(df, cols = c("age", "height")))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, cols = c("age", "height")))
+ expect_identical(expected, actual)
expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df))
+ expect_identical(expected, actual)
# drop with how
expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df))
+ expect_identical(expected, actual)
expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),]
actual <- collect(dropna(df, "all"))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, "all"))
+ expect_identical(expected, actual)
expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df, "any"))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, "any"))
+ expect_identical(expected, actual)
expected <- rows[!is.na(rows$age) & !is.na(rows$height),]
actual <- collect(dropna(df, "any", cols = c("age", "height")))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, "any", cols = c("age", "height")))
+ expect_identical(expected, actual)
expected <- rows[!is.na(rows$age) | !is.na(rows$height),]
actual <- collect(dropna(df, "all", cols = c("age", "height")))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, "all", cols = c("age", "height")))
+ expect_identical(expected, actual)
# drop with threshold
expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2,]
actual <- collect(dropna(df, minNonNulls = 2, cols = c("age", "height")))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, minNonNulls = 2, cols = c("age", "height")))
+ expect_identical(expected, actual)
expected <- rows[as.integer(!is.na(rows$age)) +
as.integer(!is.na(rows$height)) +
as.integer(!is.na(rows$name)) >= 3,]
actual <- collect(dropna(df, minNonNulls = 3, cols = c("name", "age", "height")))
expect_identical(expected, actual)
+ actual <- collect(na.omit(df, minNonNulls = 3, cols = c("name", "age", "height")))
+ expect_identical(expected, actual)
})
test_that("fillna() on a DataFrame", {