aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/testthat/test_sparkSQL.R
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-10-07 11:34:49 -0700
committerFelix Cheung <felixcheung@apache.org>2016-10-07 11:34:49 -0700
commit9d8ae853ecc5600f5c2f69565b96d5c46a8c0048 (patch)
treec3f57c9401be83b54eaa96f3fa4018fa527da3d5 /R/pkg/inst/tests/testthat/test_sparkSQL.R
parentbb1aaf28eca6d9ae9af664ac3ad35cafdfc01a3b (diff)
downloadspark-9d8ae853ecc5600f5c2f69565b96d5c46a8c0048.tar.gz
spark-9d8ae853ecc5600f5c2f69565b96d5c46a8c0048.tar.bz2
spark-9d8ae853ecc5600f5c2f69565b96d5c46a8c0048.zip
[SPARK-17665][SPARKR] Support options/mode all for read/write APIs and options in other types
## What changes were proposed in this pull request? This PR includes the changes below: - Support `mode`/`options` in `read.parquet`, `write.parquet`, `read.orc`, `write.orc`, `read.text`, `write.text`, `read.json` and `write.json` APIs - Support other types (logical, numeric and string) as options for `write.df`, `read.df`, `read.parquet`, `write.parquet`, `read.orc`, `write.orc`, `read.text`, `write.text`, `read.json` and `write.json` ## How was this patch tested? Unit tests in `test_sparkSQL.R`/ `utils.R`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #15239 from HyukjinKwon/SPARK-17665.
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R75
1 files changed, 75 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index f5ab601f27..6d8cfad5c1 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -256,6 +256,23 @@ test_that("read/write csv as DataFrame", {
unlink(csvPath2)
})
+test_that("Support other types for options", {
+ csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
+ mockLinesCsv <- c("year,make,model,comment,blank",
+ "\"2012\",\"Tesla\",\"S\",\"No comment\",",
+ "1997,Ford,E350,\"Go get one now they are going fast\",",
+ "2015,Chevy,Volt",
+ "NA,Dummy,Placeholder")
+ writeLines(mockLinesCsv, csvPath)
+
+ csvDf <- read.df(csvPath, "csv", header = "true", inferSchema = "true")
+ expected <- read.df(csvPath, "csv", header = TRUE, inferSchema = TRUE)
+ expect_equal(collect(csvDf), collect(expected))
+
+ expect_error(read.df(csvPath, "csv", header = TRUE, maxColumns = 3))
+ unlink(csvPath)
+})
+
test_that("convert NAs to null type in DataFrames", {
rdd <- parallelize(sc, list(list(1L, 2L), list(NA, 4L)))
df <- createDataFrame(rdd, list("a", "b"))
@@ -497,6 +514,19 @@ test_that("read/write json files", {
unlink(jsonPath3)
})
+test_that("read/write json files - compression option", {
+ df <- read.df(jsonPath, "json")
+
+ jsonPath <- tempfile(pattern = "jsonPath", fileext = ".json")
+ write.json(df, jsonPath, compression = "gzip")
+ jsonDF <- read.json(jsonPath)
+ expect_is(jsonDF, "SparkDataFrame")
+ expect_equal(count(jsonDF), count(df))
+ expect_true(length(list.files(jsonPath, pattern = ".gz")) > 0)
+
+ unlink(jsonPath)
+})
+
test_that("jsonRDD() on a RDD with json string", {
sqlContext <- suppressWarnings(sparkRSQL.init(sc))
rdd <- parallelize(sc, mockLines)
@@ -1786,6 +1816,21 @@ test_that("read/write ORC files", {
unsetHiveContext()
})
+test_that("read/write ORC files - compression option", {
+ setHiveContext(sc)
+ df <- read.df(jsonPath, "json")
+
+ orcPath2 <- tempfile(pattern = "orcPath2", fileext = ".orc")
+ write.orc(df, orcPath2, compression = "ZLIB")
+ orcDF <- read.orc(orcPath2)
+ expect_is(orcDF, "SparkDataFrame")
+ expect_equal(count(orcDF), count(df))
+ expect_true(length(list.files(orcPath2, pattern = ".zlib.orc")) > 0)
+
+ unlink(orcPath2)
+ unsetHiveContext()
+})
+
test_that("read/write Parquet files", {
df <- read.df(jsonPath, "json")
# Test write.df and read.df
@@ -1817,6 +1862,23 @@ test_that("read/write Parquet files", {
unlink(parquetPath4)
})
+test_that("read/write Parquet files - compression option/mode", {
+ df <- read.df(jsonPath, "json")
+ tempPath <- tempfile(pattern = "tempPath", fileext = ".parquet")
+
+ # Test write.df and read.df
+ write.parquet(df, tempPath, compression = "GZIP")
+ df2 <- read.parquet(tempPath)
+ expect_is(df2, "SparkDataFrame")
+ expect_equal(count(df2), 3)
+ expect_true(length(list.files(tempPath, pattern = ".gz.parquet")) > 0)
+
+ write.parquet(df, tempPath, mode = "overwrite")
+ df3 <- read.parquet(tempPath)
+ expect_is(df3, "SparkDataFrame")
+ expect_equal(count(df3), 3)
+})
+
test_that("read/write text files", {
# Test write.df and read.df
df <- read.df(jsonPath, "text")
@@ -1838,6 +1900,19 @@ test_that("read/write text files", {
unlink(textPath2)
})
+test_that("read/write text files - compression option", {
+ df <- read.df(jsonPath, "text")
+
+ textPath <- tempfile(pattern = "textPath", fileext = ".txt")
+ write.text(df, textPath, compression = "GZIP")
+ textDF <- read.text(textPath)
+ expect_is(textDF, "SparkDataFrame")
+ expect_equal(count(textDF), count(df))
+ expect_true(length(list.files(textPath, pattern = ".gz")) > 0)
+
+ unlink(textPath)
+})
+
test_that("describe() and summarize() on a DataFrame", {
df <- read.json(jsonPath)
stats <- describe(df, "age")