aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorSun Rui <rui.sun@intel.com>2015-08-16 00:30:02 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-08-16 00:30:02 -0700
commit5f9ce738fe6bab3f0caffad0df1d3876178cf469 (patch)
tree4fa3cb7483492e30f7c1f2951b010f24b6da901c /R
parent182f9b7a6d3a3ee7ec7de6abc24e296aa794e4e8 (diff)
downloadspark-5f9ce738fe6bab3f0caffad0df1d3876178cf469.tar.gz
spark-5f9ce738fe6bab3f0caffad0df1d3876178cf469.tar.bz2
spark-5f9ce738fe6bab3f0caffad0df1d3876178cf469.zip
[SPARK-8844] [SPARKR] head/collect is broken in SparkR.
This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui <rui.sun@intel.com> Closes #7419 from sun-rui/SPARK-8844.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/deserialize.R16
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R20
2 files changed, 30 insertions, 6 deletions
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f77be..33bf13ec9e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
# Take a single column as Array[Byte] and deserialize it into an atomic vector
readCol <- function(inputCon, numRows) {
- # sapply can not work with POSIXlt
- do.call(c, lapply(1:numRows, function(x) {
- value <- readObject(inputCon)
- # Replace NULL with NA so we can coerce to vectors
- if (is.null(value)) NA else value
- }))
+ if (numRows > 0) {
+ # sapply can not work with POSIXlt
+ do.call(c, lapply(1:numRows, function(x) {
+ value <- readObject(inputCon)
+ # Replace NULL with NA so we can coerce to vectors
+ if (is.null(value)) NA else value
+ }))
+ } else {
+ vector()
+ }
}
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21ff8..c77f633135 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
expect_equal(names(rdf)[1], "age")
expect_equal(nrow(rdf), 3)
expect_equal(ncol(rdf), 2)
+
+ # collect() returns data correctly from a DataFrame with 0 row
+ df0 <- limit(df, 0)
+ rdf <- collect(df0)
+ expect_true(is.data.frame(rdf))
+ expect_equal(names(rdf)[1], "age")
+ expect_equal(nrow(rdf), 0)
+ expect_equal(ncol(rdf), 2)
})
test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
testFirst <- first(df)
expect_equal(nrow(testFirst), 1)
+
+ # head() and first() return the correct data on
+ # a DataFrame with 0 row
+ df0 <- limit(df, 0)
+
+ testHead <- head(df0)
+ expect_equal(nrow(testHead), 0)
+ expect_equal(ncol(testHead), 2)
+
+ testFirst <- first(df0)
+ expect_equal(nrow(testFirst), 0)
+ expect_equal(ncol(testFirst), 2)
})
test_that("distinct() and unique on DataFrames", {