diff options
author | felixcheung <felixcheung_m@hotmail.com> | 2015-08-28 18:35:01 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-08-28 18:35:01 -0700 |
commit | 2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f (patch) | |
tree | 99948e6f7a674c5fcfa82526a50e11a5471f2910 /R/pkg/inst/tests/test_sparkSQL.R | |
parent | bb7f35239385ec74b5ee69631b5480fbcee253e4 (diff) | |
download | spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.tar.gz spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.tar.bz2 spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.zip |
[SPARK-9803] [SPARKR] Add subset and transform + tests
Add subset and transform
Also reorganize `[` & `[[` to subset instead of select
Note: for transform, transform is very similar to mutate. Spark doesn't seem to replace existing column with the name in mutate (ie. `mutate(df, age = df$age + 2)` - returned DataFrame has 2 columns with the same name 'age'), so therefore not doing that for now in transform.
Though it is clearly stated it should replace column with matching name (should I open a JIRA for mutate/transform?)
Author: felixcheung <felixcheung_m@hotmail.com>
Closes #8503 from felixcheung/rsubset_transform.
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/test_sparkSQL.R | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 933b11c8ee..0da5e38654 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -612,6 +612,10 @@ test_that("subsetting", { df5 <- df[df$age %in% c(19), c(1,2)] expect_equal(count(df5), 1) expect_equal(columns(df5), c("name", "age")) + + df6 <- subset(df, df$age %in% c(30), c(1,2)) + expect_equal(count(df6), 1) + expect_equal(columns(df6), c("name", "age")) }) test_that("selectExpr() on a DataFrame", { @@ -1028,7 +1032,7 @@ test_that("withColumn() and withColumnRenamed()", { expect_equal(columns(newDF2)[1], "newerAge") }) -test_that("mutate(), rename() and names()", { +test_that("mutate(), transform(), rename() and names()", { df <- jsonFile(sqlContext, jsonPath) newDF <- mutate(df, newAge = df$age + 2) expect_equal(length(columns(newDF)), 3) @@ -1042,6 +1046,20 @@ test_that("mutate(), rename() and names()", { names(newDF2) <- c("newerName", "evenNewerAge") expect_equal(length(names(newDF2)), 2) expect_equal(names(newDF2)[1], "newerName") + + transformedDF <- transform(df, newAge = -df$age, newAge2 = df$age / 2) + expect_equal(length(columns(transformedDF)), 4) + expect_equal(columns(transformedDF)[3], "newAge") + expect_equal(columns(transformedDF)[4], "newAge2") + expect_equal(first(filter(transformedDF, transformedDF$name == "Andy"))$newAge, -30) + + # test if transform on local data frames works + # ensure the proper signature is used - otherwise this will fail to run + attach(airquality) + result <- transform(Ozone, logOzone = log(Ozone)) + expect_equal(nrow(result), 153) + expect_equal(ncol(result), 2) + detach(airquality) }) test_that("write.df() on DataFrame and works with parquetFile", { |