diff options
-rw-r--r-- | R/pkg/NAMESPACE | 1 | ||||
-rw-r--r-- | R/pkg/R/DataFrame.R | 43 | ||||
-rw-r--r-- | R/pkg/R/generics.R | 6 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_context.R | 2 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 8 |
5 files changed, 47 insertions, 13 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ea42888eae..2272d8bdd5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -107,6 +107,7 @@ exportMethods("arrange", "summary", "take", "transform", + "union", "unionAll", "unique", "unpersist", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index ed0bb85f43..725cbf24f2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2251,7 +2251,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { cols } -#' rbind +#' Return a new SparkDataFrame containing the union of rows #' #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame #' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL. @@ -2261,39 +2261,64 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' @param y A SparkDataFrame #' @return A SparkDataFrame containing the result of the union. #' @family SparkDataFrame functions -#' @rdname rbind -#' @name unionAll +#' @rdname union +#' @name union +#' @seealso \link{rbind} #' @export #' @examples #'\dontrun{ #' sparkR.session() #' df1 <- read.json(path) #' df2 <- read.json(path2) -#' unioned <- unionAll(df, df2) +#' unioned <- union(df, df2) +#' unions <- rbind(df, df2, df3, df4) #' } +#' @note union since 2.0.0 +setMethod("union", + signature(x = "SparkDataFrame", y = "SparkDataFrame"), + function(x, y) { + unioned <- callJMethod(x@sdf, "union", y@sdf) + dataFrame(unioned) + }) + +#' unionAll is deprecated - use union instead +#' @rdname union +#' @name unionAll +#' @export #' @note unionAll since 1.4.0 setMethod("unionAll", signature(x = "SparkDataFrame", y = "SparkDataFrame"), function(x, y) { - unioned <- callJMethod(x@sdf, "unionAll", y@sdf) - dataFrame(unioned) + .Deprecated("union") + union(x, y) }) #' Union two or more SparkDataFrames #' -#' Returns a new SparkDataFrame containing rows of all parameters. +#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL. +#' Note that this does not remove duplicate rows across the two SparkDataFrames. #' +#' @param x A SparkDataFrame +#' @param ... Additional SparkDataFrame +#' @return A SparkDataFrame containing the result of the union. +#' @family SparkDataFrame functions #' @rdname rbind #' @name rbind +#' @seealso \link{union} #' @export +#' @examples +#'\dontrun{ +#' sparkR.session() +#' unions <- rbind(df, df2, df3, df4) +#' } #' @note rbind since 1.5.0 setMethod("rbind", signature(... = "SparkDataFrame"), function(x, ..., deparse.level = 1) { if (nargs() == 3) { - unionAll(x, ...) + union(x, ...) } else { - unionAll(x, Recall(..., deparse.level = 1)) + union(x, Recall(..., deparse.level = 1)) } }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 7b08a8ee66..27dfd67ffc 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -662,7 +662,11 @@ setGeneric("toJSON", function(x) { standardGeneric("toJSON") }) setGeneric("toRDD", function(x) { standardGeneric("toRDD") }) -#' @rdname rbind +#' @rdname union +#' @export +setGeneric("union", function(x, y) { standardGeneric("union") }) + +#' @rdname union #' @export setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") }) diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index b149818ff4..3d232df566 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -24,7 +24,7 @@ test_that("Check masked functions", { namesOfMaskedCompletely <- c("cov", "filter", "sample") namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var", "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset", - "summary", "transform", "drop", "window", "as.data.frame") + "summary", "transform", "drop", "window", "as.data.frame", "union") if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { namesOfMasked <- c("endsWith", "startsWith", namesOfMasked) } diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 7c192fb5a0..9378c7afac 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1590,7 +1590,7 @@ test_that("isLocal()", { expect_false(isLocal(df)) }) -test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", { +test_that("union(), rbind(), except(), and intersect() on a DataFrame", { df <- read.json(jsonPath) lines <- c("{\"name\":\"Bob\", \"age\":24}", @@ -1600,10 +1600,11 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", { writeLines(lines, jsonPath2) df2 <- read.df(jsonPath2, "json") - unioned <- arrange(unionAll(df, df2), df$age) + unioned <- arrange(union(df, df2), df$age) expect_is(unioned, "SparkDataFrame") expect_equal(count(unioned), 6) expect_equal(first(unioned)$name, "Michael") + expect_equal(count(arrange(suppressWarnings(unionAll(df, df2)), df$age)), 6) unioned2 <- arrange(rbind(unioned, df, df2), df$age) expect_is(unioned2, "SparkDataFrame") @@ -1620,6 +1621,9 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", { expect_equal(count(intersected), 1) expect_equal(first(intersected)$name, "Andy") + # Test base::union is working + expect_equal(union(c(1:3), c(3:5)), c(1:5)) + # Test base::rbind is working expect_equal(length(rbind(1:4, c = 2, a = 10, 10, deparse.level = 0)), 16) |