aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2016-06-28 17:08:28 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-06-28 17:08:28 -0700
commit823518c2b5259c8a954431467639198c808c9198 (patch)
treeabe9c164ddc711c80524caa388084560e9db9701 /R
parent5545b791096756b07b3207fb3de13b68b9a37b00 (diff)
downloadspark-823518c2b5259c8a954431467639198c808c9198.tar.gz
spark-823518c2b5259c8a954431467639198c808c9198.tar.bz2
spark-823518c2b5259c8a954431467639198c808c9198.zip
[SPARKR] add csv tests
## What changes were proposed in this pull request? Add unit tests for csv data for SPARKR ## How was this patch tested? unit tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #13904 from felixcheung/rcsv.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R18
1 files changed, 18 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 7562fa95e3..d4662ad4e3 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -208,6 +208,24 @@ test_that("create DataFrame from RDD", {
unsetHiveContext()
})
+test_that("read csv as DataFrame", {
+ csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
+ mockLinesCsv <- c("year,make,model,comment,blank",
+ "\"2012\",\"Tesla\",\"S\",\"No comment\",",
+ "1997,Ford,E350,\"Go get one now they are going fast\",",
+ "2015,Chevy,Volt")
+ writeLines(mockLinesCsv, csvPath)
+
+ # default "header" is false
+ df <- read.df(csvPath, "csv", header = "true")
+ expect_equal(count(df), 3)
+ expect_equal(columns(df), c("year", "make", "model", "comment", "blank"))
+ expect_equal(sort(unlist(collect(where(df, df$year == "2015")))),
+ sort(unlist(list(year = "2015", make = "Chevy", model = "Volt"))))
+
+ unlink(csvPath)
+})
+
test_that("convert NAs to null type in DataFrames", {
rdd <- parallelize(sc, list(list(1L, 2L), list(NA, 4L)))
df <- createDataFrame(rdd, list("a", "b"))