[SPARK-9803] [SPARKR] Add subset and transform + tests

Add subset and transform Also reorganize `[` & `[[` to subset instead of select Note: for transform, transform is very similar to mutate. Spark doesn't seem to replace existing column with the name in mutate (ie. `mutate(df, age = df$age + 2)` - returned DataFrame has 2 columns with the same name 'age'), so therefore not doing that for now in transform. Though it is clearly stated it should replace column with matching name (should I open a JIRA for mutate/transform?) Author: felixcheung <felixcheung_m@hotmail.com> Closes #8503 from felixcheung/rsubset_transform.
author: felixcheung <felixcheung_m@hotmail.com> 2015-08-28 18:35:01 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-08-28 18:35:01 -0700
commit: 2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f (patch)
tree: 99948e6f7a674c5fcfa82526a50e11a5471f2910 /R/pkg/inst/tests/test_sparkSQL.R
parent: bb7f35239385ec74b5ee69631b5480fbcee253e4 (diff)
download: spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.tar.gz
spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.tar.bz2
spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.zip
1 files changed, 19 insertions, 1 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 933b11c8ee..0da5e38654 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -612,6 +612,10 @@ test_that("subsetting", {
   df5 <- df[df$age %in% c(19), c(1,2)]
   expect_equal(count(df5), 1)
   expect_equal(columns(df5), c("name", "age"))
+
+  df6 <- subset(df, df$age %in% c(30), c(1,2))
+  expect_equal(count(df6), 1)
+  expect_equal(columns(df6), c("name", "age"))
 })
 
 test_that("selectExpr() on a DataFrame", {
@@ -1028,7 +1032,7 @@ test_that("withColumn() and withColumnRenamed()", {
   expect_equal(columns(newDF2)[1], "newerAge")
 })
 
-test_that("mutate(), rename() and names()", {
+test_that("mutate(), transform(), rename() and names()", {
   df <- jsonFile(sqlContext, jsonPath)
   newDF <- mutate(df, newAge = df$age + 2)
   expect_equal(length(columns(newDF)), 3)
@@ -1042,6 +1046,20 @@ test_that("mutate(), rename() and names()", {
   names(newDF2) <- c("newerName", "evenNewerAge")
   expect_equal(length(names(newDF2)), 2)
   expect_equal(names(newDF2)[1], "newerName")
+
+  transformedDF <- transform(df, newAge = -df$age, newAge2 = df$age / 2)
+  expect_equal(length(columns(transformedDF)), 4)
+  expect_equal(columns(transformedDF)[3], "newAge")
+  expect_equal(columns(transformedDF)[4], "newAge2")
+  expect_equal(first(filter(transformedDF, transformedDF$name == "Andy"))$newAge, -30)
+
+  # test if transform on local data frames works
+  # ensure the proper signature is used - otherwise this will fail to run
+  attach(airquality)
+  result <- transform(Ozone, logOzone = log(Ozone))
+  expect_equal(nrow(result), 153)
+  expect_equal(ncol(result), 2)
+  detach(airquality)
 })
 
 test_that("write.df() on DataFrame and works with parquetFile", {
author	felixcheung <felixcheung_m@hotmail.com>	2015-08-28 18:35:01 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-08-28 18:35:01 -0700
commit	2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f (patch)
tree	99948e6f7a674c5fcfa82526a50e11a5471f2910 /R/pkg/inst/tests/test_sparkSQL.R
parent	bb7f35239385ec74b5ee69631b5480fbcee253e4 (diff)
download	spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.tar.gz spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.tar.bz2 spark-2a4e00ca4d4e7a148b4ff8ce0ad1c6d517cee55f.zip