1 files changed, 115 insertions, 58 deletions
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 573c915a5c..b3c10de71f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -23,6 +23,7 @@ NULL
 #' A new \linkS4class{Column} is created to represent the literal value.
 #' If the parameter is a \linkS4class{Column}, it is returned unchanged.
 #'
+#' @param x a literal value or a Column.
 #' @family normal_funcs
 #' @rdname lit
 #' @name lit
@@ -89,8 +90,6 @@ setMethod("acos",
 #' Returns the approximate number of distinct items in a group. This is a column
 #' aggregate function.
 #'
-#' @param x Column to compute on.
-#'
 #' @rdname approxCountDistinct
 #' @name approxCountDistinct
 #' @return the approximate number of distinct items in a group.
@@ -171,8 +170,6 @@ setMethod("atan",
 #'
 #' Aggregate function: returns the average of the values in a group.
 #'
-#' @param x Column to compute on.
-#'
 #' @rdname avg
 #' @name avg
 #' @family agg_funcs
@@ -319,7 +316,7 @@ setMethod("column",
 #'
 #' Computes the Pearson Correlation Coefficient for two Columns.
 #'
-#' @param x Column to compute on.
+#' @param col2 a (second) Column.
 #'
 #' @rdname corr
 #' @name corr
@@ -339,8 +336,6 @@ setMethod("corr", signature(x = "Column"),
 #'
 #' Compute the sample covariance between two expressions.
 #'
-#' @param x Column to compute on.
-#'
 #' @rdname cov
 #' @name cov
 #' @family math_funcs
@@ -362,8 +357,8 @@ setMethod("cov", signature(x = "characterOrColumn"),
 
 #' @rdname cov
 #'
-#' @param col1 First column to compute cov_samp.
-#' @param col2 Second column to compute cov_samp.
+#' @param col1 the first Column.
+#' @param col2 the second Column.
 #' @name covar_samp
 #' @aliases covar_samp,characterOrColumn,characterOrColumn-method
 #' @note covar_samp since 2.0.0
@@ -451,9 +446,7 @@ setMethod("cosh",
 #'
 #' Returns the number of items in a group. This is a column aggregate function.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname nrow
+#' @rdname count
 #' @name count
 #' @family agg_funcs
 #' @aliases count,Column-method
@@ -493,6 +486,7 @@ setMethod("crc32",
 #' Calculates the hash code of given columns, and returns the result as a int column.
 #'
 #' @param x Column to compute on.
+#' @param ... additional Column(s) to be included.
 #'
 #' @rdname hash
 #' @name hash
@@ -663,7 +657,8 @@ setMethod("factorial",
 #' The function by default returns the first values it sees. It will return the first non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
 #'
-#' @param x Column to compute on.
+#' @param na.rm a logical value indicating whether NA values should be stripped
+#'        before the computation proceeds.
 #'
 #' @rdname first
 #' @name first
@@ -832,7 +827,10 @@ setMethod("kurtosis",
 #' The function by default returns the last values it sees. It will return the last non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
 #'
-#' @param x Column to compute on.
+#' @param x column to compute on.
+#' @param na.rm a logical value indicating whether NA values should be stripped
+#'        before the computation proceeds.
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @rdname last
 #' @name last
@@ -1143,7 +1141,7 @@ setMethod("minute",
 #' @export
 #' @examples \dontrun{select(df, monotonically_increasing_id())}
 setMethod("monotonically_increasing_id",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "monotonically_increasing_id")
             column(jc)
@@ -1272,13 +1270,16 @@ setMethod("round",
 
 #' bround
 #'
-#' Returns the value of the column `e` rounded to `scale` decimal places using HALF_EVEN rounding
-#' mode if `scale` >= 0 or at integral part when `scale` < 0.
+#' Returns the value of the column \code{e} rounded to \code{scale} decimal places using HALF_EVEN rounding
+#' mode if \code{scale} >= 0 or at integer part when \code{scale} < 0.
 #' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number.
 #' bround(2.5, 0) = 2, bround(3.5, 0) = 4.
 #'
 #' @param x Column to compute on.
-#'
+#' @param scale round to \code{scale} digits to the right of the decimal point when \code{scale} > 0,
+#'        the nearest even number when \code{scale} = 0, and \code{scale} digits to the left
+#'        of the decimal point when \code{scale} < 0.
+#' @param ... further arguments to be passed to or from other methods.
 #' @rdname bround
 #' @name bround
 #' @family math_funcs
@@ -1319,7 +1320,7 @@ setMethod("rtrim",
 #' Aggregate function: alias for \link{stddev_samp}
 #'
 #' @param x Column to compute on.
-#'
+#' @param na.rm currently not used.
 #' @rdname sd
 #' @name sd
 #' @family agg_funcs
@@ -1497,7 +1498,7 @@ setMethod("soundex",
 #' \dontrun{select(df, spark_partition_id())}
 #' @note spark_partition_id since 2.0.0
 setMethod("spark_partition_id",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id")
             column(jc)
@@ -1560,7 +1561,8 @@ setMethod("stddev_samp",
 #'
 #' Creates a new struct column that composes multiple input columns.
 #'
-#' @param x Column to compute on.
+#' @param x a column to compute on.
+#' @param ... optional column(s) to be included.
 #'
 #' @rdname struct
 #' @name struct
@@ -1831,8 +1833,8 @@ setMethod("upper",
 #'
 #' Aggregate function: alias for \link{var_samp}.
 #'
-#' @param x Column to compute on.
-#'
+#' @param x a Column to compute on.
+#' @param y,na.rm,use currently not used.
 #' @rdname var
 #' @name var
 #' @family agg_funcs
@@ -2114,7 +2116,9 @@ setMethod("pmod", signature(y = "Column"),
 #' @rdname approxCountDistinct
 #' @name approxCountDistinct
 #'
+#' @param x Column to compute on.
 #' @param rsd maximum estimation error allowed (default = 0.05)
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @aliases approxCountDistinct,Column-method
 #' @export
@@ -2127,7 +2131,7 @@ setMethod("approxCountDistinct",
             column(jc)
           })
 
-#' Count Distinct
+#' Count Distinct Values
 #'
 #' @param x Column to compute on
 #' @param ... other columns
@@ -2156,7 +2160,7 @@ setMethod("countDistinct",
 #' concat
 #'
 #' Concatenates multiple input string columns together into a single string column.
-#' 
+#'
 #' @param x Column to compute on
 #' @param ... other columns
 #'
@@ -2246,7 +2250,6 @@ setMethod("ceiling",
           })
 
 #' @rdname sign
-#' @param x Column to compute on
 #'
 #' @name sign
 #' @aliases sign,Column-method
@@ -2262,9 +2265,6 @@ setMethod("sign", signature(x = "Column"),
 #'
 #' Aggregate function: returns the number of distinct items in a group.
 #'
-#' @param x Column to compute on
-#' @param ... other columns
-#'
 #' @rdname countDistinct
 #' @name n_distinct
 #' @aliases n_distinct,Column-method
@@ -2276,9 +2276,7 @@ setMethod("n_distinct", signature(x = "Column"),
             countDistinct(x, ...)
           })
 
-#' @rdname nrow
-#' @param x Column to compute on
-#'
+#' @rdname count
 #' @name n
 #' @aliases n,Column-method
 #' @export
@@ -2300,8 +2298,8 @@ setMethod("n", signature(x = "Column"),
 #' NOTE: Use when ever possible specialized functions like \code{year}. These benefit from a
 #' specialized implementation.
 #'
-#' @param y Column to compute on
-#' @param x date format specification 
+#' @param y Column to compute on.
+#' @param x date format specification.
 #'
 #' @family datetime_funcs
 #' @rdname date_format
@@ -2320,8 +2318,8 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #'
 #' Assumes given timestamp is UTC and converts to given timezone.
 #'
-#' @param y Column to compute on
-#' @param x time zone to use 
+#' @param y Column to compute on.
+#' @param x time zone to use.
 #'
 #' @family datetime_funcs
 #' @rdname from_utc_timestamp
@@ -2370,8 +2368,8 @@ setMethod("instr", signature(y = "Column", x = "character"),
 #' Day of the week parameter is case insensitive, and accepts first three or two characters:
 #' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
 #'
-#' @param y Column to compute on
-#' @param x Day of the week string 
+#' @param y Column to compute on.
+#' @param x Day of the week string.
 #'
 #' @family datetime_funcs
 #' @rdname next_day
@@ -2637,6 +2635,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
 #' Parses the expression string into the column that it represents, similar to
 #' SparkDataFrame.selectExpr
 #'
+#' @param x an expression character object to be parsed.
 #' @family normal_funcs
 #' @rdname expr
 #' @aliases expr,character-method
@@ -2654,6 +2653,9 @@ setMethod("expr", signature(x = "character"),
 #'
 #' Formats the arguments in printf-style and returns the result as a string column.
 #'
+#' @param format a character object of format strings.
+#' @param x a Column.
+#' @param ... additional Column(s).
 #' @family string_funcs
 #' @rdname format_string
 #' @name format_string
@@ -2676,6 +2678,11 @@ setMethod("format_string", signature(format = "character", x = "Column"),
 #' representing the timestamp of that moment in the current system time zone in the given
 #' format.
 #'
+#' @param x a Column of unix timestamp.
+#' @param format the target format. See
+#'               \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#'               Customizing Formats} for available options.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family datetime_funcs
 #' @rdname from_unixtime
 #' @name from_unixtime
@@ -2702,19 +2709,21 @@ setMethod("from_unixtime", signature(x = "Column"),
 #' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
 #' the order of months are not supported.
 #'
-#' The time column must be of TimestampType.
-#'
-#' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
-#' interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
-#' If the `slideDuration` is not provided, the windows will be tumbling windows.
-#'
-#' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
-#' window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
-#' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
-#'
-#' The output column will be a struct called 'window' by default with the nested columns 'start'
-#' and 'end'.
-#'
+#' @param x a time Column. Must be of TimestampType.
+#' @param windowDuration a string specifying the width of the window, e.g. '1 second',
+#'                       '1 day 12 hours', '2 minutes'. Valid interval strings are 'week',
+#'                       'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
+#' @param slideDuration a string specifying the sliding interval of the window. Same format as
+#'                      \code{windowDuration}. A new window will be generated every
+#'                      \code{slideDuration}. Must be less than or equal to
+#'                      the \code{windowDuration}.
+#' @param startTime the offset with respect to 1970-01-01 00:00:00 UTC with which to start
+#'                  window intervals. For example, in order to have hourly tumbling windows
+#'                  that start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
+#'                  \code{startTime} as \code{"15 minutes"}.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return An output column of struct called 'window' by default with the nested columns 'start'
+#'         and 'end'.
 #' @family datetime_funcs
 #' @rdname window
 #' @name window
@@ -2766,6 +2775,10 @@ setMethod("window", signature(x = "Column"),
 #' NOTE: The position is not zero based, but 1 based index, returns 0 if substr
 #' could not be found in str.
 #'
+#' @param substr a character string to be matched.
+#' @param str a Column where matches are sought for each entry.
+#' @param pos start position of search.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family string_funcs
 #' @rdname locate
 #' @aliases locate,character,Column-method
@@ -2785,6 +2798,9 @@ setMethod("locate", signature(substr = "character", str = "Column"),
 #'
 #' Left-pad the string column with
 #'
+#' @param x the string Column to be left-padded.
+#' @param len maximum length of each output result.
+#' @param pad a character string to be padded with.
 #' @family string_funcs
 #' @rdname lpad
 #' @aliases lpad,Column,numeric,character-method
@@ -2804,6 +2820,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
 #'
 #' Generate a random column with i.i.d. samples from U[0.0, 1.0].
 #'
+#' @param seed a random seed. Can be missing.
 #' @family normal_funcs
 #' @rdname rand
 #' @name rand
@@ -2832,6 +2849,7 @@ setMethod("rand", signature(seed = "numeric"),
 #'
 #' Generate a column with i.i.d. samples from the standard normal distribution.
 #'
+#' @param seed a random seed. Can be missing.
 #' @family normal_funcs
 #' @rdname randn
 #' @name randn
@@ -2860,6 +2878,9 @@ setMethod("randn", signature(seed = "numeric"),
 #'
 #' Extract a specific(idx) group identified by a java regex, from the specified string column.
 #'
+#' @param x a string Column.
+#' @param pattern a regular expression.
+#' @param idx a group index.
 #' @family string_funcs
 #' @rdname regexp_extract
 #' @name regexp_extract
@@ -2880,6 +2901,9 @@ setMethod("regexp_extract",
 #'
 #' Replace all substrings of the specified string value that match regexp with rep.
 #'
+#' @param x a string Column.
+#' @param pattern a regular expression.
+#' @param replacement a character string that a matched \code{pattern} is replaced with.
 #' @family string_funcs
 #' @rdname regexp_replace
 #' @name regexp_replace
@@ -2900,6 +2924,9 @@ setMethod("regexp_replace",
 #'
 #' Right-padded with pad to a length of len.
 #'
+#' @param x the string Column to be right-padded.
+#' @param len maximum length of each output result.
+#' @param pad a character string to be padded with.
 #' @family string_funcs
 #' @rdname rpad
 #' @name rpad
@@ -2922,6 +2949,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
 #' returned. If count is negative, every to the right of the final delimiter (counting from the
 #' right) is returned. substring_index performs a case-sensitive match when searching for delim.
 #'
+#' @param x a Column.
+#' @param delim a delimiter string.
+#' @param count number of occurrences of \code{delim} before the substring is returned.
+#'              A positive number means counting from the left, while negative means
+#'              counting from the right.
 #' @family string_funcs
 #' @rdname substring_index
 #' @aliases substring_index,Column,character,numeric-method
@@ -2949,6 +2981,11 @@ setMethod("substring_index",
 #' The translate will happen when any character in the string matching with the character
 #' in the matchingString.
 #'
+#' @param x a string Column.
+#' @param matchingString a source string where each character will be translated.
+#' @param replaceString a target string where each \code{matchingString} character will
+#'                      be replaced by the character in \code{replaceString}
+#'                      at the same location, if any.
 #' @family string_funcs
 #' @rdname translate
 #' @name translate
@@ -2997,6 +3034,10 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
             column(jc)
           })
 
+#' @param x a Column of date, in string, date or timestamp type.
+#' @param format the target format. See
+#'               \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#'               Customizing Formats} for available options.
 #' @rdname unix_timestamp
 #' @name unix_timestamp
 #' @aliases unix_timestamp,Column,character-method
@@ -3012,6 +3053,8 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
 #' Evaluates a list of conditions and returns one of multiple possible result expressions.
 #' For unmatched expressions null is returned.
 #'
+#' @param condition the condition to test on. Must be a Column expression.
+#' @param value result expression.
 #' @family normal_funcs
 #' @rdname when
 #' @name when
@@ -3033,6 +3076,9 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
 #' Evaluates a list of conditions and returns \code{yes} if the conditions are satisfied.
 #' Otherwise \code{no} is returned for unmatched conditions.
 #'
+#' @param test a Column expression that describes the condition.
+#' @param yes return values for \code{TRUE} elements of test.
+#' @param no return values for \code{FALSE} elements of test.
 #' @family normal_funcs
 #' @rdname ifelse
 #' @name ifelse
@@ -3074,10 +3120,14 @@ setMethod("ifelse",
 #' @family window_funcs
 #' @aliases cume_dist,missing-method
 #' @export
-#' @examples \dontrun{cume_dist()}
+#' @examples \dontrun{
+#'   df <- createDataFrame(iris)
+#'   ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length")
+#'   out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species)
+#' }
 #' @note cume_dist since 1.6.0
 setMethod("cume_dist",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist")
             column(jc)
@@ -3101,7 +3151,7 @@ setMethod("cume_dist",
 #' @examples \dontrun{dense_rank()}
 #' @note dense_rank since 1.6.0
 setMethod("dense_rank",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank")
             column(jc)
@@ -3115,6 +3165,11 @@ setMethod("dense_rank",
 #'
 #' This is equivalent to the LAG function in SQL.
 #'
+#' @param x the column as a character string or a Column to compute on.
+#' @param offset the number of rows back from the current row from which to obtain a value.
+#'               If not specified, the default is 1.
+#' @param defaultValue default to use when the offset row does not exist.
+#' @param ... further arguments to be passed to or from other methods.
 #' @rdname lag
 #' @name lag
 #' @aliases lag,characterOrColumn-method
@@ -3143,7 +3198,7 @@ setMethod("lag",
 #' an `offset` of one will return the next row at any given point in the window partition.
 #'
 #' This is equivalent to the LEAD function in SQL.
-#' 
+#'
 #' @param x Column to compute on
 #' @param offset Number of rows to offset
 #' @param defaultValue (Optional) default value to use
@@ -3211,7 +3266,7 @@ setMethod("ntile",
 #' @examples \dontrun{percent_rank()}
 #' @note percent_rank since 1.6.0
 setMethod("percent_rank",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank")
             column(jc)
@@ -3243,6 +3298,8 @@ setMethod("rank",
           })
 
 # Expose rank() in the R base package
+#' @param x a numeric, complex, character or logical vector.
+#' @param ... additional argument(s) passed to the method.
 #' @name rank
 #' @rdname rank
 #' @aliases rank,ANY-method
@@ -3267,7 +3324,7 @@ setMethod("rank",
 #' @examples \dontrun{row_number()}
 #' @note row_number since 1.6.0
 setMethod("row_number",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "row_number")
             column(jc)
@@ -3318,7 +3375,7 @@ setMethod("explode",
 #' size
 #'
 #' Returns length of array or map.
-#' 
+#'
 #' @param x Column to compute on
 #'
 #' @rdname size