aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/test_sparkSQL.R
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2015-08-19 10:41:14 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-08-19 10:41:14 -0700
commit2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c (patch)
tree8f6d6309505e5cd3e2bf1380dfed328bcf640318 /R/pkg/inst/tests/test_sparkSQL.R
parentf3e177917dd093873dca286d2419704a561ba44b (diff)
downloadspark-2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c.tar.gz
spark-2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c.tar.bz2
spark-2fcb9cb9552dac1d78dcca5d4d5032b4fa6c985c.zip
[SPARK-9856] [SPARKR] Add expression functions into SparkR whose params are complicated
I added lots of Column functinos into SparkR. And I also added `rand(seed: Int)` and `randn(seed: Int)` in Scala. Since we need such APIs for R integer type. ### JIRA [[SPARK-9856] Add expression functions into SparkR whose params are complicated - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-9856) Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #8264 from yu-iskw/SPARK-9856-3.
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R98
1 files changed, 92 insertions, 6 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 841de657df..670017ed34 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -598,6 +598,11 @@ test_that("selectExpr() on a DataFrame", {
expect_equal(count(selected2), 3)
})
+test_that("expr() on a DataFrame", {
+ df <- jsonFile(sqlContext, jsonPath)
+ expect_equal(collect(select(df, expr("abs(-123)")))[1, 1], 123)
+})
+
test_that("column calculation", {
df <- jsonFile(sqlContext, jsonPath)
d <- collect(select(df, alias(df$age + 1, "age2")))
@@ -667,16 +672,15 @@ test_that("column functions", {
c <- SparkR:::col("a")
c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
- c3 <- cosh(c) + count(c) + crc32(c) + dayofmonth(c) + dayofyear(c) + exp(c)
+ c3 <- cosh(c) + count(c) + crc32(c) + exp(c)
c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
c5 <- hour(c) + initcap(c) + isNaN(c) + last(c) + last_day(c) + length(c)
c6 <- log(c) + (c) + log1p(c) + log2(c) + lower(c) + ltrim(c) + max(c) + md5(c)
- c7 <- mean(c) + min(c) + minute(c) + month(c) + negate(c) + quarter(c)
- c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + second(c) + sha1(c)
+ c7 <- mean(c) + min(c) + month(c) + negate(c) + quarter(c)
+ c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c)
c9 <- signum(c) + sin(c) + sinh(c) + size(c) + soundex(c) + sqrt(c) + sum(c)
c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
- c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c) + weekofyear(c)
- c12 <- year(c)
+ c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
df <- jsonFile(sqlContext, jsonPath)
df2 <- select(df, between(df$age, c(20, 30)), between(df$age, c(10, 20)))
@@ -689,8 +693,11 @@ test_that("column functions", {
expect_equal(collect(df3)[[1, 1]], TRUE)
expect_equal(collect(df3)[[2, 1]], FALSE)
expect_equal(collect(df3)[[3, 1]], TRUE)
-})
+ df4 <- createDataFrame(sqlContext, list(list(a = "010101")))
+ expect_equal(collect(select(df4, conv(df4$a, 2, 16)))[1, 1], "15")
+})
+#
test_that("column binary mathfunctions", {
lines <- c("{\"a\":1, \"b\":5}",
"{\"a\":2, \"b\":6}",
@@ -709,6 +716,13 @@ test_that("column binary mathfunctions", {
expect_equal(collect(select(df, hypot(df$a, df$b)))[3, "HYPOT(a, b)"], sqrt(3^2 + 7^2))
expect_equal(collect(select(df, hypot(df$a, df$b)))[4, "HYPOT(a, b)"], sqrt(4^2 + 8^2))
## nolint end
+ expect_equal(collect(select(df, shiftLeft(df$b, 1)))[4, 1], 16)
+ expect_equal(collect(select(df, shiftRight(df$b, 1)))[4, 1], 4)
+ expect_equal(collect(select(df, shiftRightUnsigned(df$b, 1)))[4, 1], 4)
+ expect_equal(class(collect(select(df, rand()))[2, 1]), "numeric")
+ expect_equal(collect(select(df, rand(1)))[1, 1], 0.45, tolerance = 0.01)
+ expect_equal(class(collect(select(df, randn()))[2, 1]), "numeric")
+ expect_equal(collect(select(df, randn(1)))[1, 1], -0.0111, tolerance = 0.01)
})
test_that("string operators", {
@@ -718,6 +732,78 @@ test_that("string operators", {
expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30")
+ expect_equal(collect(select(df, concat_ws(":", df$name)))[[2, 1]], "Andy")
+ expect_equal(collect(select(df, concat_ws(":", df$name, df$age)))[[2, 1]], "Andy:30")
+ expect_equal(collect(select(df, instr(df$name, "i")))[, 1], c(2, 0, 5))
+ expect_equal(collect(select(df, format_number(df$age, 2)))[2, 1], "30.00")
+ expect_equal(collect(select(df, sha1(df$name)))[2, 1],
+ "ab5a000e88b5d9d0fa2575f5c6263eb93452405d")
+ expect_equal(collect(select(df, sha2(df$name, 256)))[2, 1],
+ "80f2aed3c618c423ddf05a2891229fba44942d907173152442cf6591441ed6dc")
+ expect_equal(collect(select(df, format_string("Name:%s", df$name)))[2, 1], "Name:Andy")
+ expect_equal(collect(select(df, format_string("%s, %d", df$name, df$age)))[2, 1], "Andy, 30")
+ expect_equal(collect(select(df, regexp_extract(df$name, "(n.y)", 1)))[2, 1], "ndy")
+ expect_equal(collect(select(df, regexp_replace(df$name, "(n.y)", "ydn")))[2, 1], "Aydn")
+
+ l2 <- list(list(a = "aaads"))
+ df2 <- createDataFrame(sqlContext, l2)
+ expect_equal(collect(select(df2, locate("aa", df2$a)))[1, 1], 1)
+ expect_equal(collect(select(df2, locate("aa", df2$a, 1)))[1, 1], 2)
+ expect_equal(collect(select(df2, lpad(df2$a, 8, "#")))[1, 1], "###aaads")
+ expect_equal(collect(select(df2, rpad(df2$a, 8, "#")))[1, 1], "aaads###")
+
+ l3 <- list(list(a = "a.b.c.d"))
+ df3 <- createDataFrame(sqlContext, l3)
+ expect_equal(collect(select(df3, substring_index(df3$a, ".", 2)))[1, 1], "a.b")
+ expect_equal(collect(select(df3, substring_index(df3$a, ".", -3)))[1, 1], "b.c.d")
+ expect_equal(collect(select(df3, translate(df3$a, "bc", "12")))[1, 1], "a.1.2.d")
+})
+
+test_that("date functions on a DataFrame", {
+ .originalTimeZone <- Sys.getenv("TZ")
+ Sys.setenv(TZ = "UTC")
+ l <- list(list(a = 1L, b = as.Date("2012-12-13")),
+ list(a = 2L, b = as.Date("2013-12-14")),
+ list(a = 3L, b = as.Date("2014-12-15")))
+ df <- createDataFrame(sqlContext, l)
+ expect_equal(collect(select(df, dayofmonth(df$b)))[, 1], c(13, 14, 15))
+ expect_equal(collect(select(df, dayofyear(df$b)))[, 1], c(348, 348, 349))
+ expect_equal(collect(select(df, weekofyear(df$b)))[, 1], c(50, 50, 51))
+ expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014))
+ expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12))
+ expect_equal(collect(select(df, last_day(df$b)))[, 1],
+ c(as.Date("2012-12-31"), as.Date("2013-12-31"), as.Date("2014-12-31")))
+ expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1],
+ c(as.Date("2012-12-17"), as.Date("2013-12-16"), as.Date("2014-12-22")))
+ expect_equal(collect(select(df, date_format(df$b, "y")))[, 1], c("2012", "2013", "2014"))
+ expect_equal(collect(select(df, add_months(df$b, 3)))[, 1],
+ c(as.Date("2013-03-13"), as.Date("2014-03-14"), as.Date("2015-03-15")))
+ expect_equal(collect(select(df, date_add(df$b, 1)))[, 1],
+ c(as.Date("2012-12-14"), as.Date("2013-12-15"), as.Date("2014-12-16")))
+ expect_equal(collect(select(df, date_sub(df$b, 1)))[, 1],
+ c(as.Date("2012-12-12"), as.Date("2013-12-13"), as.Date("2014-12-14")))
+
+ l2 <- list(list(a = 1L, b = as.POSIXlt("2012-12-13 12:34:00", tz = "UTC")),
+ list(a = 2L, b = as.POSIXlt("2014-12-15 01:24:34", tz = "UTC")))
+ df2 <- createDataFrame(sqlContext, l2)
+ expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
+ expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
+ expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
+ c(as.POSIXlt("2012-12-13 21:34:00 UTC"), as.POSIXlt("2014-12-15 10:24:34 UTC")))
+ expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
+ c(as.POSIXlt("2012-12-13 03:34:00 UTC"), as.POSIXlt("2014-12-14 16:24:34 UTC")))
+ expect_more_than(collect(select(df2, unix_timestamp()))[1, 1], 0)
+ expect_more_than(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
+ expect_more_than(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
+
+ l3 <- list(list(a = 1000), list(a = -1000))
+ df3 <- createDataFrame(sqlContext, l3)
+ result31 <- collect(select(df3, from_unixtime(df3$a)))
+ expect_equal(grep("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", result31[, 1], perl = TRUE),
+ c(1, 2))
+ result32 <- collect(select(df3, from_unixtime(df3$a, "yyyy")))
+ expect_equal(grep("\\d{4}", result32[, 1]), c(1, 2))
+ Sys.setenv(TZ = .originalTimeZone)
})
test_that("greatest() and least() on a DataFrame", {