aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/test_sparkSQL.R
diff options
context:
space:
mode:
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R36
1 files changed, 30 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 7a42e289fc..dbb535e245 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -428,6 +428,10 @@ test_that("sampleDF on a DataFrame", {
expect_true(inherits(sampled, "DataFrame"))
sampled2 <- sampleDF(df, FALSE, 0.1)
expect_true(count(sampled2) < 3)
+
+ # Also test sample_frac
+ sampled3 <- sample_frac(df, FALSE, 0.1)
+ expect_true(count(sampled3) < 3)
})
test_that("select operators", {
@@ -533,6 +537,7 @@ test_that("column functions", {
c2 <- min(c) + max(c) + sum(c) + avg(c) + count(c) + abs(c) + sqrt(c)
c3 <- lower(c) + upper(c) + first(c) + last(c)
c4 <- approxCountDistinct(c) + countDistinct(c) + cast(c, "string")
+ c5 <- n(c) + n_distinct(c)
})
test_that("string operators", {
@@ -557,6 +562,13 @@ test_that("group by", {
expect_true(inherits(df2, "DataFrame"))
expect_true(3 == count(df2))
+ # Also test group_by, summarize, mean
+ gd1 <- group_by(df, "name")
+ expect_true(inherits(gd1, "GroupedData"))
+ df_summarized <- summarize(gd, mean_age = mean(df$age))
+ expect_true(inherits(df_summarized, "DataFrame"))
+ expect_true(3 == count(df_summarized))
+
df3 <- agg(gd, age = "sum")
expect_true(inherits(df3, "DataFrame"))
expect_true(3 == count(df3))
@@ -573,12 +585,12 @@ test_that("group by", {
expect_true(3 == count(max(gd, "age")))
})
-test_that("sortDF() and orderBy() on a DataFrame", {
+test_that("arrange() and orderBy() on a DataFrame", {
df <- jsonFile(sqlCtx, jsonPath)
- sorted <- sortDF(df, df$age)
+ sorted <- arrange(df, df$age)
expect_true(collect(sorted)[1,2] == "Michael")
- sorted2 <- sortDF(df, "name")
+ sorted2 <- arrange(df, "name")
expect_true(collect(sorted2)[2,"age"] == 19)
sorted3 <- orderBy(df, asc(df$age))
@@ -659,17 +671,17 @@ test_that("unionAll(), except(), and intersect() on a DataFrame", {
writeLines(lines, jsonPath2)
df2 <- loadDF(sqlCtx, jsonPath2, "json")
- unioned <- sortDF(unionAll(df, df2), df$age)
+ unioned <- arrange(unionAll(df, df2), df$age)
expect_true(inherits(unioned, "DataFrame"))
expect_true(count(unioned) == 6)
expect_true(first(unioned)$name == "Michael")
- excepted <- sortDF(except(df, df2), desc(df$age))
+ excepted <- arrange(except(df, df2), desc(df$age))
expect_true(inherits(unioned, "DataFrame"))
expect_true(count(excepted) == 2)
expect_true(first(excepted)$name == "Justin")
- intersected <- sortDF(intersect(df, df2), df$age)
+ intersected <- arrange(intersect(df, df2), df$age)
expect_true(inherits(unioned, "DataFrame"))
expect_true(count(intersected) == 1)
expect_true(first(intersected)$name == "Andy")
@@ -687,6 +699,18 @@ test_that("withColumn() and withColumnRenamed()", {
expect_true(columns(newDF2)[1] == "newerAge")
})
+test_that("mutate() and rename()", {
+ df <- jsonFile(sqlCtx, jsonPath)
+ newDF <- mutate(df, newAge = df$age + 2)
+ expect_true(length(columns(newDF)) == 3)
+ expect_true(columns(newDF)[3] == "newAge")
+ expect_true(first(filter(newDF, df$name != "Michael"))$newAge == 32)
+
+ newDF2 <- rename(df, newerAge = df$age)
+ expect_true(length(columns(newDF2)) == 2)
+ expect_true(columns(newDF2)[1] == "newerAge")
+})
+
test_that("saveDF() on DataFrame and works with parquetFile", {
df <- jsonFile(sqlCtx, jsonPath)
saveDF(df, parquetPath, "parquet", mode="overwrite")