aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/DataFrame.R8
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R7
2 files changed, 14 insertions, 1 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e33d0d8e29..97e0c9edea 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2642,6 +2642,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
#'
#' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
#' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL.
+#' Input SparkDataFrames can have different schemas (names and data types).
#'
#' Note: This does not remove duplicate rows across the two SparkDataFrames.
#'
@@ -2685,7 +2686,8 @@ setMethod("unionAll",
#' Union two or more SparkDataFrames
#'
-#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL.
+#' Union two or more SparkDataFrames by row. As in R's \code{rbind}, this method
+#' requires that the input SparkDataFrames have the same column names.
#'
#' Note: This does not remove duplicate rows across the two SparkDataFrames.
#'
@@ -2709,6 +2711,10 @@ setMethod("unionAll",
setMethod("rbind",
signature(... = "SparkDataFrame"),
function(x, ..., deparse.level = 1) {
+ nm <- lapply(list(x, ...), names)
+ if (length(unique(nm)) != 1) {
+ stop("Names of input data frames are different.")
+ }
if (nargs() == 3) {
union(x, ...)
} else {
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 7c096597fe..620b633637 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1850,6 +1850,13 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", {
expect_equal(count(unioned2), 12)
expect_equal(first(unioned2)$name, "Michael")
+ df3 <- df2
+ names(df3)[1] <- "newName"
+ expect_error(rbind(df, df3),
+ "Names of input data frames are different.")
+ expect_error(rbind(df, df2, df3),
+ "Names of input data frames are different.")
+
excepted <- arrange(except(df, df2), desc(df$age))
expect_is(unioned, "SparkDataFrame")
expect_equal(count(excepted), 2)