aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorSun Rui <rui.sun@intel.com>2015-12-03 21:11:10 -0800
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-12-03 21:11:10 -0800
commit5011f264fb53705c528250bd055acbc2eca2baaa (patch)
tree4ffb766a9625d66ad2f3ff3895f7e3e9d5e2f3e8 /R/pkg
parentb6e9963ee4bf0ffb62c8e9829a551bcdc31e12e3 (diff)
downloadspark-5011f264fb53705c528250bd055acbc2eca2baaa.tar.gz
spark-5011f264fb53705c528250bd055acbc2eca2baaa.tar.bz2
spark-5011f264fb53705c528250bd055acbc2eca2baaa.zip
[SPARK-12104][SPARKR] collect() does not handle multiple columns with same name.
Author: Sun Rui <rui.sun@intel.com> Closes #10118 from sun-rui/SPARK-12104.
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/R/DataFrame.R8
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R6
2 files changed, 10 insertions, 4 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index a82ded9c51..81b4e6b91d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -822,21 +822,21 @@ setMethod("collect",
# Get a column of complex type returns a list.
# Get a cell from a column of complex type returns a list instead of a vector.
col <- listCols[[colIndex]]
- colName <- dtypes[[colIndex]][[1]]
if (length(col) <= 0) {
- df[[colName]] <- col
+ df[[colIndex]] <- col
} else {
colType <- dtypes[[colIndex]][[2]]
# Note that "binary" columns behave like complex types.
if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") {
vec <- do.call(c, col)
stopifnot(class(vec) != "list")
- df[[colName]] <- vec
+ df[[colIndex]] <- vec
} else {
- df[[colName]] <- col
+ df[[colIndex]] <- col
}
}
}
+ names(df) <- names(x)
df
}
})
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 92ec82096c..1e7cb54099 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -530,6 +530,11 @@ test_that("collect() returns a data.frame", {
expect_equal(names(rdf)[1], "age")
expect_equal(nrow(rdf), 0)
expect_equal(ncol(rdf), 2)
+
+ # collect() correctly handles multiple columns with same name
+ df <- createDataFrame(sqlContext, list(list(1, 2)), schema = c("name", "name"))
+ ldf <- collect(df)
+ expect_equal(names(ldf), c("name", "name"))
})
test_that("limit() returns DataFrame with the correct number of rows", {
@@ -1197,6 +1202,7 @@ test_that("join() and merge() on a DataFrame", {
joined <- join(df, df2)
expect_equal(names(joined), c("age", "name", "name", "test"))
expect_equal(count(joined), 12)
+ expect_equal(names(collect(joined)), c("age", "name", "name", "test"))
joined2 <- join(df, df2, df$name == df2$name)
expect_equal(names(joined2), c("age", "name", "name", "test"))