aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/testthat/test_sparkSQL.R
diff options
context:
space:
mode:
authorForest Fang <forest.fang@outlook.com>2015-12-29 12:45:24 +0530
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-12-29 12:45:24 +0530
commitd80cc90b5545cff82cd9b340f12d01eafc9ca524 (patch)
tree2f4894084bc072a93323a6f57ececd8311d68c8a /R/pkg/inst/tests/testthat/test_sparkSQL.R
parent73862a1eb9744c3c32458c9c6f6431c23783786a (diff)
downloadspark-d80cc90b5545cff82cd9b340f12d01eafc9ca524.tar.gz
spark-d80cc90b5545cff82cd9b340f12d01eafc9ca524.tar.bz2
spark-d80cc90b5545cff82cd9b340f12d01eafc9ca524.zip
[SPARK-12526][SPARKR] ifelse`, `when`, `otherwise` unable to take Column as value
`ifelse`, `when`, `otherwise` is unable to take `Column` typed S4 object as values. For example: ```r ifelse(lit(1) == lit(1), lit(2), lit(3)) ifelse(df$mpg > 0, df$mpg, 0) ``` will both fail with ```r attempt to replicate an object of type 'environment' ``` The PR replaces `ifelse` calls with `if ... else ...` inside the function implementations to avoid attempt to vectorize(i.e. `rep()`). It remains to be discussed whether we should instead support vectorization in these functions for consistency because `ifelse` in base R is vectorized but I cannot foresee any scenarios these functions will want to be vectorized in SparkR. For reference, added test cases which trigger failures: ```r . Error: when(), otherwise() and ifelse() with column on a DataFrame ---------- error in evaluating the argument 'x' in selecting a method for function 'collect': error in evaluating the argument 'col' in selecting a method for function 'select': attempt to replicate an object of type 'environment' Calls: when -> when -> ifelse -> ifelse 1: withCallingHandlers(eval(code, new_test_environment), error = capture_calls, message = function(c) invokeRestart("muffleMessage")) 2: eval(code, new_test_environment) 3: eval(expr, envir, enclos) 4: expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, lit(1))))[, 1], c(NA, 1)) at test_sparkSQL.R:1126 5: expect_that(object, equals(expected, label = expected.label, ...), info = info, label = label) 6: condition(object) 7: compare(actual, expected, ...) 8: collect(select(df, when(df$a > 1 & df$b > 2, lit(1)))) Error: Test failures Execution halted ``` Author: Forest Fang <forest.fang@outlook.com> Closes #10481 from saurfang/spark-12526.
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R8
1 files changed, 8 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 135c7576e5..c2b6adbe3a 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1120,6 +1120,14 @@ test_that("when(), otherwise() and ifelse() on a DataFrame", {
expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], c(1, 0))
})
+test_that("when(), otherwise() and ifelse() with column on a DataFrame", {
+ l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+ df <- createDataFrame(sqlContext, l)
+ expect_equal(collect(select(df, when(df$a > 1 & df$b > 2, lit(1))))[, 1], c(NA, 1))
+ expect_equal(collect(select(df, otherwise(when(df$a > 1, lit(1)), lit(0))))[, 1], c(0, 1))
+ expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, lit(0), lit(1))))[, 1], c(1, 0))
+})
+
test_that("group by, agg functions", {
df <- read.json(sqlContext, jsonPath)
df1 <- agg(df, name = "max", age = "sum")