aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/testthat/test_sparkSQL.R
diff options
context:
space:
mode:
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R32
1 files changed, 26 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index a3aa26d9e7..a0ab719202 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -213,15 +213,35 @@ test_that("read csv as DataFrame", {
mockLinesCsv <- c("year,make,model,comment,blank",
"\"2012\",\"Tesla\",\"S\",\"No comment\",",
"1997,Ford,E350,\"Go get one now they are going fast\",",
- "2015,Chevy,Volt")
+ "2015,Chevy,Volt",
+ "NA,Dummy,Placeholder")
writeLines(mockLinesCsv, csvPath)
- # default "header" is false
- df <- read.df(csvPath, "csv", header = "true")
- expect_equal(count(df), 3)
+ # default "header" is false, inferSchema to handle "year" as "int"
+ df <- read.df(csvPath, "csv", header = "true", inferSchema = "true")
+ expect_equal(count(df), 4)
expect_equal(columns(df), c("year", "make", "model", "comment", "blank"))
- expect_equal(sort(unlist(collect(where(df, df$year == "2015")))),
- sort(unlist(list(year = "2015", make = "Chevy", model = "Volt"))))
+ expect_equal(sort(unlist(collect(where(df, df$year == 2015)))),
+ sort(unlist(list(year = 2015, make = "Chevy", model = "Volt"))))
+
+ # since "year" is "int", let's skip the NA values
+ withoutna <- na.omit(df, how = "any", cols = "year")
+ expect_equal(count(withoutna), 3)
+
+ unlink(csvPath)
+ csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
+ mockLinesCsv <- c("year,make,model,comment,blank",
+ "\"2012\",\"Tesla\",\"S\",\"No comment\",",
+ "1997,Ford,E350,\"Go get one now they are going fast\",",
+ "2015,Chevy,Volt",
+ "Empty,Dummy,Placeholder")
+ writeLines(mockLinesCsv, csvPath)
+
+ df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.string = "Empty")
+ expect_equal(count(df2), 4)
+ withoutna2 <- na.omit(df2, how = "any", cols = "year")
+ expect_equal(count(withoutna2), 3)
+ expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
unlink(csvPath)
})