aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/R/functions.R
diff options
context:
space:
mode:
authorfelixcheung <felixcheung_m@hotmail.com>2015-11-28 21:02:05 -0800
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-11-28 21:02:05 -0800
commit28e46ab46368ea3833c8e805163893bbb6f2a265 (patch)
tree40f106cde4a49f464281d33312d8abb6fa148e28 /R/pkg/R/functions.R
parent149cd692ee2e127d79386fd8e584f4f70a2906ba (diff)
downloadspark-28e46ab46368ea3833c8e805163893bbb6f2a265.tar.gz
spark-28e46ab46368ea3833c8e805163893bbb6f2a265.tar.bz2
spark-28e46ab46368ea3833c8e805163893bbb6f2a265.zip
[SPARK-12029][SPARKR] Improve column functions signature, param check, tests, fix doc and add examples
shivaram sun-rui Author: felixcheung <felixcheung_m@hotmail.com> Closes #10019 from felixcheung/rfunctionsdoc.
Diffstat (limited to 'R/pkg/R/functions.R')
-rw-r--r--R/pkg/R/functions.R121
1 files changed, 91 insertions, 30 deletions
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e98e7a0117..b30331c61c 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -878,7 +878,7 @@ setMethod("rtrim",
#'}
setMethod("sd",
signature(x = "Column"),
- function(x, na.rm = FALSE) {
+ function(x) {
# In R, sample standard deviation is calculated with the sd() function.
stddev_samp(x)
})
@@ -1250,7 +1250,7 @@ setMethod("upper",
#'}
setMethod("var",
signature(x = "Column"),
- function(x, y = NULL, na.rm = FALSE, use) {
+ function(x) {
# In R, sample variance is calculated with the var() function.
var_samp(x)
})
@@ -1467,6 +1467,7 @@ setMethod("pmod", signature(y = "Column"),
#' @name approxCountDistinct
#' @return the approximate number of distinct items in a group.
#' @export
+#' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
setMethod("approxCountDistinct",
signature(x = "Column"),
function(x, rsd = 0.05) {
@@ -1481,14 +1482,16 @@ setMethod("approxCountDistinct",
#' @name countDistinct
#' @return the number of distinct items in a group.
#' @export
+#' @examples \dontrun{countDistinct(df$c)}
setMethod("countDistinct",
signature(x = "Column"),
function(x, ...) {
- jcol <- lapply(list(...), function (x) {
+ jcols <- lapply(list(...), function (x) {
+ stopifnot(class(x) == "Column")
x@jc
})
jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
- jcol)
+ jcols)
column(jc)
})
@@ -1501,10 +1504,14 @@ setMethod("countDistinct",
#' @rdname concat
#' @name concat
#' @export
+#' @examples \dontrun{concat(df$strings, df$strings2)}
setMethod("concat",
signature(x = "Column"),
function(x, ...) {
- jcols <- lapply(list(x, ...), function(x) { x@jc })
+ jcols <- lapply(list(x, ...), function (x) {
+ stopifnot(class(x) == "Column")
+ x@jc
+ })
jc <- callJStatic("org.apache.spark.sql.functions", "concat", jcols)
column(jc)
})
@@ -1518,11 +1525,15 @@ setMethod("concat",
#' @rdname greatest
#' @name greatest
#' @export
+#' @examples \dontrun{greatest(df$c, df$d)}
setMethod("greatest",
signature(x = "Column"),
function(x, ...) {
stopifnot(length(list(...)) > 0)
- jcols <- lapply(list(x, ...), function(x) { x@jc })
+ jcols <- lapply(list(x, ...), function (x) {
+ stopifnot(class(x) == "Column")
+ x@jc
+ })
jc <- callJStatic("org.apache.spark.sql.functions", "greatest", jcols)
column(jc)
})
@@ -1530,17 +1541,21 @@ setMethod("greatest",
#' least
#'
#' Returns the least value of the list of column names, skipping null values.
-#' This function takes at least 2 parameters. It will return null iff all parameters are null.
+#' This function takes at least 2 parameters. It will return null if all parameters are null.
#'
#' @family normal_funcs
#' @rdname least
#' @name least
#' @export
+#' @examples \dontrun{least(df$c, df$d)}
setMethod("least",
signature(x = "Column"),
function(x, ...) {
stopifnot(length(list(...)) > 0)
- jcols <- lapply(list(x, ...), function(x) { x@jc })
+ jcols <- lapply(list(x, ...), function (x) {
+ stopifnot(class(x) == "Column")
+ x@jc
+ })
jc <- callJStatic("org.apache.spark.sql.functions", "least", jcols)
column(jc)
})
@@ -1549,11 +1564,10 @@ setMethod("least",
#'
#' Computes the ceiling of the given value.
#'
-#' @family math_funcs
#' @rdname ceil
-#' @name ceil
-#' @aliases ceil
+#' @name ceiling
#' @export
+#' @examples \dontrun{ceiling(df$c)}
setMethod("ceiling",
signature(x = "Column"),
function(x) {
@@ -1564,11 +1578,10 @@ setMethod("ceiling",
#'
#' Computes the signum of the given value.
#'
-#' @family math_funcs
#' @rdname signum
-#' @name signum
-#' @aliases signum
+#' @name sign
#' @export
+#' @examples \dontrun{sign(df$c)}
setMethod("sign", signature(x = "Column"),
function(x) {
signum(x)
@@ -1578,11 +1591,10 @@ setMethod("sign", signature(x = "Column"),
#'
#' Aggregate function: returns the number of distinct items in a group.
#'
-#' @family agg_funcs
#' @rdname countDistinct
-#' @name countDistinct
-#' @aliases countDistinct
+#' @name n_distinct
#' @export
+#' @examples \dontrun{n_distinct(df$c)}
setMethod("n_distinct", signature(x = "Column"),
function(x, ...) {
countDistinct(x, ...)
@@ -1592,11 +1604,10 @@ setMethod("n_distinct", signature(x = "Column"),
#'
#' Aggregate function: returns the number of items in a group.
#'
-#' @family agg_funcs
#' @rdname count
-#' @name count
-#' @aliases count
+#' @name n
#' @export
+#' @examples \dontrun{n(df$c)}
setMethod("n", signature(x = "Column"),
function(x) {
count(x)
@@ -1617,6 +1628,7 @@ setMethod("n", signature(x = "Column"),
#' @rdname date_format
#' @name date_format
#' @export
+#' @examples \dontrun{date_format(df$t, 'MM/dd/yyy')}
setMethod("date_format", signature(y = "Column", x = "character"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "date_format", y@jc, x)
@@ -1631,6 +1643,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#' @rdname from_utc_timestamp
#' @name from_utc_timestamp
#' @export
+#' @examples \dontrun{from_utc_timestamp(df$t, 'PST')}
setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
@@ -1649,6 +1662,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
#' @rdname instr
#' @name instr
#' @export
+#' @examples \dontrun{instr(df$c, 'b')}
setMethod("instr", signature(y = "Column", x = "character"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "instr", y@jc, x)
@@ -1663,13 +1677,18 @@ setMethod("instr", signature(y = "Column", x = "character"),
#' For example, \code{next_day('2015-07-27', "Sunday")} returns 2015-08-02 because that is the first
#' Sunday after 2015-07-27.
#'
-#' Day of the week parameter is case insensitive, and accepts:
+#' Day of the week parameter is case insensitive, and accepts first three or two characters:
#' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
#'
#' @family datetime_funcs
#' @rdname next_day
#' @name next_day
#' @export
+#' @examples
+#'\dontrun{
+#'next_day(df$d, 'Sun')
+#'next_day(df$d, 'Sunday')
+#'}
setMethod("next_day", signature(y = "Column", x = "character"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "next_day", y@jc, x)
@@ -1684,6 +1703,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
#' @rdname to_utc_timestamp
#' @name to_utc_timestamp
#' @export
+#' @examples \dontrun{to_utc_timestamp(df$t, 'PST')}
setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
@@ -1697,8 +1717,8 @@ setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
#' @name add_months
#' @family datetime_funcs
#' @rdname add_months
-#' @name add_months
#' @export
+#' @examples \dontrun{add_months(df$d, 1)}
setMethod("add_months", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "add_months", y@jc, as.integer(x))
@@ -1713,6 +1733,7 @@ setMethod("add_months", signature(y = "Column", x = "numeric"),
#' @rdname date_add
#' @name date_add
#' @export
+#' @examples \dontrun{date_add(df$d, 1)}
setMethod("date_add", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "date_add", y@jc, as.integer(x))
@@ -1727,6 +1748,7 @@ setMethod("date_add", signature(y = "Column", x = "numeric"),
#' @rdname date_sub
#' @name date_sub
#' @export
+#' @examples \dontrun{date_sub(df$d, 1)}
setMethod("date_sub", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "date_sub", y@jc, as.integer(x))
@@ -1735,16 +1757,19 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"),
#' format_number
#'
-#' Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places,
+#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places,
#' and returns the result as a string column.
#'
-#' If d is 0, the result has no decimal point or fractional part.
-#' If d < 0, the result will be null.'
+#' If x is 0, the result has no decimal point or fractional part.
+#' If x < 0, the result will be null.
#'
+#' @param y column to format
+#' @param x number of decimal place to format to
#' @family string_funcs
#' @rdname format_number
#' @name format_number
#' @export
+#' @examples \dontrun{format_number(df$n, 4)}
setMethod("format_number", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1764,6 +1789,7 @@ setMethod("format_number", signature(y = "Column", x = "numeric"),
#' @rdname sha2
#' @name sha2
#' @export
+#' @examples \dontrun{sha2(df$c, 256)}
setMethod("sha2", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions", "sha2", y@jc, as.integer(x))
@@ -1779,6 +1805,7 @@ setMethod("sha2", signature(y = "Column", x = "numeric"),
#' @rdname shiftLeft
#' @name shiftLeft
#' @export
+#' @examples \dontrun{shiftLeft(df$c, 1)}
setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1796,6 +1823,7 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
#' @rdname shiftRight
#' @name shiftRight
#' @export
+#' @examples \dontrun{shiftRight(df$c, 1)}
setMethod("shiftRight", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1813,6 +1841,7 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
#' @rdname shiftRightUnsigned
#' @name shiftRightUnsigned
#' @export
+#' @examples \dontrun{shiftRightUnsigned(df$c, 1)}
setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
function(y, x) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1830,6 +1859,7 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
#' @rdname concat_ws
#' @name concat_ws
#' @export
+#' @examples \dontrun{concat_ws('-', df$s, df$d)}
setMethod("concat_ws", signature(sep = "character", x = "Column"),
function(sep, x, ...) {
jcols <- lapply(list(x, ...), function(x) { x@jc })
@@ -1845,6 +1875,7 @@ setMethod("concat_ws", signature(sep = "character", x = "Column"),
#' @rdname conv
#' @name conv
#' @export
+#' @examples \dontrun{conv(df$n, 2, 16)}
setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeric"),
function(x, fromBase, toBase) {
fromBase <- as.integer(fromBase)
@@ -1864,6 +1895,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
#' @rdname expr
#' @name expr
#' @export
+#' @examples \dontrun{expr('length(name)')}
setMethod("expr", signature(x = "character"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "expr", x)
@@ -1878,6 +1910,7 @@ setMethod("expr", signature(x = "character"),
#' @rdname format_string
#' @name format_string
#' @export
+#' @examples \dontrun{format_string('%d %s', df$a, df$b)}
setMethod("format_string", signature(format = "character", x = "Column"),
function(format, x, ...) {
jcols <- lapply(list(x, ...), function(arg) { arg@jc })
@@ -1897,6 +1930,11 @@ setMethod("format_string", signature(format = "character", x = "Column"),
#' @rdname from_unixtime
#' @name from_unixtime
#' @export
+#' @examples
+#'\dontrun{
+#'from_unixtime(df$t)
+#'from_unixtime(df$t, 'yyyy/MM/dd HH')
+#'}
setMethod("from_unixtime", signature(x = "Column"),
function(x, format = "yyyy-MM-dd HH:mm:ss") {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1915,6 +1953,7 @@ setMethod("from_unixtime", signature(x = "Column"),
#' @rdname locate
#' @name locate
#' @export
+#' @examples \dontrun{locate('b', df$c, 1)}
setMethod("locate", signature(substr = "character", str = "Column"),
function(substr, str, pos = 0) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1931,6 +1970,7 @@ setMethod("locate", signature(substr = "character", str = "Column"),
#' @rdname lpad
#' @name lpad
#' @export
+#' @examples \dontrun{lpad(df$c, 6, '#')}
setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
function(x, len, pad) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -1947,12 +1987,13 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
#' @rdname rand
#' @name rand
#' @export
+#' @examples \dontrun{rand()}
setMethod("rand", signature(seed = "missing"),
function(seed) {
jc <- callJStatic("org.apache.spark.sql.functions", "rand")
column(jc)
})
-#' @family normal_funcs
+
#' @rdname rand
#' @name rand
#' @export
@@ -1970,12 +2011,13 @@ setMethod("rand", signature(seed = "numeric"),
#' @rdname randn
#' @name randn
#' @export
+#' @examples \dontrun{randn()}
setMethod("randn", signature(seed = "missing"),
function(seed) {
jc <- callJStatic("org.apache.spark.sql.functions", "randn")
column(jc)
})
-#' @family normal_funcs
+
#' @rdname randn
#' @name randn
#' @export
@@ -1993,6 +2035,7 @@ setMethod("randn", signature(seed = "numeric"),
#' @rdname regexp_extract
#' @name regexp_extract
#' @export
+#' @examples \dontrun{regexp_extract(df$c, '(\d+)-(\d+)', 1)}
setMethod("regexp_extract",
signature(x = "Column", pattern = "character", idx = "numeric"),
function(x, pattern, idx) {
@@ -2010,6 +2053,7 @@ setMethod("regexp_extract",
#' @rdname regexp_replace
#' @name regexp_replace
#' @export
+#' @examples \dontrun{regexp_replace(df$c, '(\\d+)', '--')}
setMethod("regexp_replace",
signature(x = "Column", pattern = "character", replacement = "character"),
function(x, pattern, replacement) {
@@ -2027,6 +2071,7 @@ setMethod("regexp_replace",
#' @rdname rpad
#' @name rpad
#' @export
+#' @examples \dontrun{rpad(df$c, 6, '#')}
setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
function(x, len, pad) {
jc <- callJStatic("org.apache.spark.sql.functions",
@@ -2040,12 +2085,17 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
#' Returns the substring from string str before count occurrences of the delimiter delim.
#' If count is positive, everything the left of the final delimiter (counting from left) is
#' returned. If count is negative, every to the right of the final delimiter (counting from the
-#' right) is returned. substring <- index performs a case-sensitive match when searching for delim.
+#' right) is returned. substring_index performs a case-sensitive match when searching for delim.
#'
#' @family string_funcs
#' @rdname substring_index
#' @name substring_index
#' @export
+#' @examples
+#'\dontrun{
+#'substring_index(df$c, '.', 2)
+#'substring_index(df$c, '.', -1)
+#'}
setMethod("substring_index",
signature(x = "Column", delim = "character", count = "numeric"),
function(x, delim, count) {
@@ -2066,6 +2116,7 @@ setMethod("substring_index",
#' @rdname translate
#' @name translate
#' @export
+#' @examples \dontrun{translate(df$c, 'rnlt', '123')}
setMethod("translate",
signature(x = "Column", matchingString = "character", replaceString = "character"),
function(x, matchingString, replaceString) {
@@ -2082,12 +2133,18 @@ setMethod("translate",
#' @rdname unix_timestamp
#' @name unix_timestamp
#' @export
+#' @examples
+#'\dontrun{
+#'unix_timestamp()
+#'unix_timestamp(df$t)
+#'unix_timestamp(df$t, 'yyyy-MM-dd HH')
+#'}
setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
function(x, format) {
jc <- callJStatic("org.apache.spark.sql.functions", "unix_timestamp")
column(jc)
})
-#' @family datetime_funcs
+
#' @rdname unix_timestamp
#' @name unix_timestamp
#' @export
@@ -2096,7 +2153,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
jc <- callJStatic("org.apache.spark.sql.functions", "unix_timestamp", x@jc)
column(jc)
})
-#' @family datetime_funcs
+
#' @rdname unix_timestamp
#' @name unix_timestamp
#' @export
@@ -2113,7 +2170,9 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
#' @family normal_funcs
#' @rdname when
#' @name when
+#' @seealso \link{ifelse}
#' @export
+#' @examples \dontrun{when(df$age == 2, df$age + 1)}
setMethod("when", signature(condition = "Column", value = "ANY"),
function(condition, value) {
condition <- condition@jc
@@ -2130,7 +2189,9 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
#' @family normal_funcs
#' @rdname ifelse
#' @name ifelse
+#' @seealso \link{when}
#' @export
+#' @examples \dontrun{ifelse(df$a > 1 & df$b > 2, 0, 1)}
setMethod("ifelse",
signature(test = "Column", yes = "ANY", no = "ANY"),
function(test, yes, no) {