aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyang Qian <junyangq@databricks.com>2016-08-20 06:59:23 -0700
committerFelix Cheung <felixcheung@apache.org>2016-08-20 06:59:23 -0700
commit01401e965b58f7e8ab615764a452d7d18f1d4bf0 (patch)
tree8929d69ec434607be44617e5aadf7598eed64cd2
parent39f328ba3519b01940a7d1cdee851ba4e75ef31f (diff)
downloadspark-01401e965b58f7e8ab615764a452d7d18f1d4bf0.tar.gz
spark-01401e965b58f7e8ab615764a452d7d18f1d4bf0.tar.bz2
spark-01401e965b58f7e8ab615764a452d7d18f1d4bf0.zip
[SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings.
## What changes were proposed in this pull request? This PR tries to fix all the remaining "undocumented/duplicated arguments" warnings given by CRAN-check. One left is doc for R `stats::glm` exported in SparkR. To mute that warning, we have to also provide document for all arguments of that non-SparkR function. Some previous conversation is in #14558. ## How was this patch tested? R unit test and `check-cran.sh` script (with no-test). Author: Junyang Qian <junyangq@databricks.com> Closes #14705 from junyangq/SPARK-16508-master.
-rw-r--r--R/pkg/R/DataFrame.R221
-rw-r--r--R/pkg/R/SQLContext.R30
-rw-r--r--R/pkg/R/WindowSpec.R11
-rw-r--r--R/pkg/R/column.R18
-rw-r--r--R/pkg/R/functions.R173
-rw-r--r--R/pkg/R/generics.R62
-rw-r--r--R/pkg/R/group.R7
-rw-r--r--R/pkg/R/mllib.R113
-rw-r--r--R/pkg/R/schema.R5
-rw-r--r--R/pkg/R/sparkR.R21
-rw-r--r--R/pkg/R/stats.R25
11 files changed, 419 insertions, 267 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 09be06de06..540dc3122d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -120,8 +120,9 @@ setMethod("schema",
#'
#' Print the logical and physical Catalyst plans to the console for debugging.
#'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame.
#' @param extended Logical. If extended is FALSE, explain() only prints the physical plan.
+#' @param ... further arguments to be passed to or from other methods.
#' @family SparkDataFrame functions
#' @aliases explain,SparkDataFrame-method
#' @rdname explain
@@ -177,11 +178,13 @@ setMethod("isLocal",
#'
#' Print the first numRows rows of a SparkDataFrame
#'
-#' @param x A SparkDataFrame
-#' @param numRows The number of rows to print. Defaults to 20.
-#' @param truncate Whether truncate long strings. If true, strings more than 20 characters will be
-#' truncated. However, if set greater than zero, truncates strings longer than `truncate`
-#' characters and all cells will be aligned right.
+#' @param x a SparkDataFrame.
+#' @param numRows the number of rows to print. Defaults to 20.
+#' @param truncate whether truncate long strings. If \code{TRUE}, strings more than
+#' 20 characters will be truncated. However, if set greater than zero,
+#' truncates strings longer than `truncate` characters and all cells
+#' will be aligned right.
+#' @param ... further arguments to be passed to or from other methods.
#' @family SparkDataFrame functions
#' @aliases showDF,SparkDataFrame-method
#' @rdname showDF
@@ -211,7 +214,7 @@ setMethod("showDF",
#'
#' Print the SparkDataFrame column names and types
#'
-#' @param x A SparkDataFrame
+#' @param object a SparkDataFrame.
#'
#' @family SparkDataFrame functions
#' @rdname show
@@ -262,11 +265,11 @@ setMethod("dtypes",
})
})
-#' Column names
+#' Column Names of SparkDataFrame
#'
-#' Return all column names as a list
+#' Return all column names as a list.
#'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame.
#'
#' @family SparkDataFrame functions
#' @rdname columns
@@ -323,6 +326,8 @@ setMethod("colnames",
columns(x)
})
+#' @param value a character vector. Must have the same length as the number
+#' of columns in the SparkDataFrame.
#' @rdname columns
#' @aliases colnames<-,SparkDataFrame-method
#' @name colnames<-
@@ -514,9 +519,10 @@ setMethod("registerTempTable",
#'
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
#'
-#' @param x A SparkDataFrame
-#' @param tableName A character vector containing the name of the table
-#' @param overwrite A logical argument indicating whether or not to overwrite
+#' @param x a SparkDataFrame.
+#' @param tableName a character vector containing the name of the table.
+#' @param overwrite a logical argument indicating whether or not to overwrite.
+#' @param ... further arguments to be passed to or from other methods.
#' the existing rows in the table.
#'
#' @family SparkDataFrame functions
@@ -575,7 +581,9 @@ setMethod("cache",
#' supported storage levels, refer to
#' \url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}.
#'
-#' @param x The SparkDataFrame to persist
+#' @param x the SparkDataFrame to persist.
+#' @param newLevel storage level chosen for the persistance. See available options in
+#' the description.
#'
#' @family SparkDataFrame functions
#' @rdname persist
@@ -603,8 +611,9 @@ setMethod("persist",
#' Mark this SparkDataFrame as non-persistent, and remove all blocks for it from memory and
#' disk.
#'
-#' @param x The SparkDataFrame to unpersist
-#' @param blocking Whether to block until all blocks are deleted
+#' @param x the SparkDataFrame to unpersist.
+#' @param blocking whether to block until all blocks are deleted.
+#' @param ... further arguments to be passed to or from other methods.
#'
#' @family SparkDataFrame functions
#' @rdname unpersist-methods
@@ -638,9 +647,10 @@ setMethod("unpersist",
#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
#' using `spark.sql.shuffle.partitions` as number of partitions.}
#'}
-#' @param x A SparkDataFrame
-#' @param numPartitions The number of partitions to use.
-#' @param col The column by which the partitioning will be performed.
+#' @param x a SparkDataFrame.
+#' @param numPartitions the number of partitions to use.
+#' @param col the column by which the partitioning will be performed.
+#' @param ... additional column(s) to be used in the partitioning.
#'
#' @family SparkDataFrame functions
#' @rdname repartition
@@ -919,11 +929,10 @@ setMethod("sample_frac",
#' Returns the number of rows in a SparkDataFrame
#'
-#' @param x A SparkDataFrame
-#'
+#' @param x a SparkDataFrame.
#' @family SparkDataFrame functions
#' @rdname nrow
-#' @name count
+#' @name nrow
#' @aliases count,SparkDataFrame-method
#' @export
#' @examples
@@ -999,9 +1008,10 @@ setMethod("dim",
#' Collects all the elements of a SparkDataFrame and coerces them into an R data.frame.
#'
-#' @param x A SparkDataFrame
-#' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
+#' @param x a SparkDataFrame.
+#' @param stringsAsFactors (Optional) a logical indicating whether or not string columns
#' should be converted to factors. FALSE by default.
+#' @param ... further arguments to be passed to or from other methods.
#'
#' @family SparkDataFrame functions
#' @rdname collect
@@ -1096,8 +1106,10 @@ setMethod("limit",
dataFrame(res)
})
-#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame
+#' Take the first NUM rows of a SparkDataFrame and return the results as a R data.frame
#'
+#' @param x a SparkDataFrame.
+#' @param num number of rows to take.
#' @family SparkDataFrame functions
#' @rdname take
#' @name take
@@ -1124,9 +1136,9 @@ setMethod("take",
#' then head() returns the first 6 rows in keeping with the current data.frame
#' convention in R.
#'
-#' @param x A SparkDataFrame
-#' @param num The number of rows to return. Default is 6.
-#' @return A data.frame
+#' @param x a SparkDataFrame.
+#' @param num the number of rows to return. Default is 6.
+#' @return A data.frame.
#'
#' @family SparkDataFrame functions
#' @aliases head,SparkDataFrame-method
@@ -1150,7 +1162,8 @@ setMethod("head",
#' Return the first row of a SparkDataFrame
#'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame or a column used in aggregation function.
+#' @param ... further arguments to be passed to or from other methods.
#'
#' @family SparkDataFrame functions
#' @aliases first,SparkDataFrame-method
@@ -1201,8 +1214,9 @@ setMethod("toRDD",
#'
#' Groups the SparkDataFrame using the specified columns, so we can run aggregation on them.
#'
-#' @param x a SparkDataFrame
-#' @return a GroupedData
+#' @param x a SparkDataFrame.
+#' @param ... variable(s) (character names(s) or Column(s)) to group on.
+#' @return A GroupedData.
#' @family SparkDataFrame functions
#' @aliases groupBy,SparkDataFrame-method
#' @rdname groupBy
@@ -1244,7 +1258,6 @@ setMethod("group_by",
#'
#' Compute aggregates by specifying a list of columns
#'
-#' @param x a SparkDataFrame
#' @family SparkDataFrame functions
#' @aliases agg,SparkDataFrame-method
#' @rdname summarize
@@ -1391,16 +1404,15 @@ setMethod("dapplyCollect",
#' Groups the SparkDataFrame using the specified columns and applies the R function to each
#' group.
#'
-#' @param x A SparkDataFrame
-#' @param cols Grouping columns
-#' @param func A function to be applied to each group partition specified by grouping
+#' @param cols grouping columns.
+#' @param func a function to be applied to each group partition specified by grouping
#' column of the SparkDataFrame. The function `func` takes as argument
#' a key - grouping columns and a data frame - a local R data.frame.
#' The output of `func` is a local R data.frame.
-#' @param schema The schema of the resulting SparkDataFrame after the function is applied.
+#' @param schema the schema of the resulting SparkDataFrame after the function is applied.
#' The schema must match to output of `func`. It has to be defined for each
#' output column with preferred output column name and corresponding data type.
-#' @return a SparkDataFrame
+#' @return A SparkDataFrame.
#' @family SparkDataFrame functions
#' @aliases gapply,SparkDataFrame-method
#' @rdname gapply
@@ -1483,13 +1495,12 @@ setMethod("gapply",
#' Groups the SparkDataFrame using the specified columns, applies the R function to each
#' group and collects the result back to R as data.frame.
#'
-#' @param x A SparkDataFrame
-#' @param cols Grouping columns
-#' @param func A function to be applied to each group partition specified by grouping
+#' @param cols grouping columns.
+#' @param func a function to be applied to each group partition specified by grouping
#' column of the SparkDataFrame. The function `func` takes as argument
#' a key - grouping columns and a data frame - a local R data.frame.
#' The output of `func` is a local R data.frame.
-#' @return a data.frame
+#' @return A data.frame.
#' @family SparkDataFrame functions
#' @aliases gapplyCollect,SparkDataFrame-method
#' @rdname gapplyCollect
@@ -1636,6 +1647,7 @@ getColumn <- function(x, c) {
column(callJMethod(x@sdf, "col", c))
}
+#' @param name name of a Column (without being wrapped by \code{""}).
#' @rdname select
#' @name $
#' @aliases $,SparkDataFrame-method
@@ -1645,6 +1657,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
getColumn(x, name)
})
+#' @param value a Column or NULL. If NULL, the specified Column is dropped.
#' @rdname select
#' @name $<-
#' @aliases $<-,SparkDataFrame-method
@@ -1719,12 +1732,13 @@ setMethod("[", signature(x = "SparkDataFrame"),
#' Subset
#'
#' Return subsets of SparkDataFrame according to given conditions
-#' @param x A SparkDataFrame
-#' @param subset (Optional) A logical expression to filter on rows
-#' @param select expression for the single Column or a list of columns to select from the SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param i,subset (Optional) a logical expression to filter on rows.
+#' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
#' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
-#' Otherwise, a SparkDataFrame will always be returned.
-#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns
+#' Otherwise, a SparkDataFrame will always be returned.
+#' @param ... currently not used.
+#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
#' @export
#' @family SparkDataFrame functions
#' @aliases subset,SparkDataFrame-method
@@ -1759,9 +1773,12 @@ setMethod("subset", signature(x = "SparkDataFrame"),
#' Select
#'
#' Selects a set of columns with names or Column expressions.
-#' @param x A SparkDataFrame
-#' @param col A list of columns or single Column or name
-#' @return A new SparkDataFrame with selected columns
+#' @param x a SparkDataFrame.
+#' @param col a list of columns or single Column or name.
+#' @param ... additional column(s) if only one column is specified in \code{col}.
+#' If more than one column is assigned in \code{col}, \code{...}
+#' should be left empty.
+#' @return A new SparkDataFrame with selected columns.
#' @export
#' @family SparkDataFrame functions
#' @rdname select
@@ -1858,9 +1875,9 @@ setMethod("selectExpr",
#' Return a new SparkDataFrame by adding a column or replacing the existing column
#' that has the same name.
#'
-#' @param x A SparkDataFrame
-#' @param colName A column name.
-#' @param col A Column expression.
+#' @param x a SparkDataFrame.
+#' @param colName a column name.
+#' @param col a Column expression.
#' @return A SparkDataFrame with the new column added or the existing column replaced.
#' @family SparkDataFrame functions
#' @aliases withColumn,SparkDataFrame,character,Column-method
@@ -1889,8 +1906,8 @@ setMethod("withColumn",
#'
#' Return a new SparkDataFrame with the specified columns added or replaced.
#'
-#' @param .data A SparkDataFrame
-#' @param col a named argument of the form name = col
+#' @param .data a SparkDataFrame.
+#' @param ... additional column argument(s) each in the form name = col.
#' @return A new SparkDataFrame with the new columns added or replaced.
#' @family SparkDataFrame functions
#' @aliases mutate,SparkDataFrame-method
@@ -1967,6 +1984,7 @@ setMethod("mutate",
do.call(select, c(x, colList, deDupCols))
})
+#' @param _data a SparkDataFrame.
#' @export
#' @rdname mutate
#' @aliases transform,SparkDataFrame-method
@@ -2278,11 +2296,18 @@ setMethod("join",
#' specified, the common column names in \code{x} and \code{y} will be used.
#' @param by.x a character vector specifying the joining columns for x.
#' @param by.y a character vector specifying the joining columns for y.
+#' @param all a boolean value setting \code{all.x} and \code{all.y}
+#' if any of them are unset.
#' @param all.x a boolean value indicating whether all the rows in x should
#' be including in the join
#' @param all.y a boolean value indicating whether all the rows in y should
#' be including in the join
#' @param sort a logical argument indicating whether the resulting columns should be sorted
+#' @param suffixes a string vector of length 2 used to make colnames of
+#' \code{x} and \code{y} unique.
+#' The first element is appended to each colname of \code{x}.
+#' The second element is appended to each colname of \code{y}.
+#' @param ... additional argument(s) passed to the method.
#' @details If all.x and all.y are set to FALSE, a natural join will be returned. If
#' all.x is set to TRUE and all.y is set to FALSE, a left outer join will
#' be returned. If all.x is set to FALSE and all.y is set to TRUE, a right
@@ -2311,7 +2336,7 @@ setMethod("merge",
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by,
all = FALSE, all.x = all, all.y = all,
- sort = TRUE, suffixes = c("_x", "_y"), ... ) {
+ sort = TRUE, suffixes = c("_x", "_y"), ...) {
if (length(suffixes) != 2) {
stop("suffixes must have length 2")
@@ -2464,8 +2489,10 @@ setMethod("unionAll",
#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
#' Note that this does not remove duplicate rows across the two SparkDataFrames.
#'
-#' @param x A SparkDataFrame
-#' @param ... Additional SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param ... additional SparkDataFrame(s).
+#' @param deparse.level currently not used (put here to match the signature of
+#' the base implementation).
#' @return A SparkDataFrame containing the result of the union.
#' @family SparkDataFrame functions
#' @aliases rbind,SparkDataFrame-method
@@ -2522,8 +2549,8 @@ setMethod("intersect",
#' Return a new SparkDataFrame containing rows in this SparkDataFrame
#' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL.
#'
-#' @param x A SparkDataFrame
-#' @param y A SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param y a SparkDataFrame.
#' @return A SparkDataFrame containing the result of the except operation.
#' @family SparkDataFrame functions
#' @aliases except,SparkDataFrame,SparkDataFrame-method
@@ -2564,10 +2591,11 @@ setMethod("except",
#' and to not change the existing data.
#' }
#'
-#' @param df A SparkDataFrame
-#' @param path A name for the table
-#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param df a SparkDataFrame.
+#' @param path a name for the table.
+#' @param source a name for external data source.
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param ... additional argument(s) passed to the method.
#'
#' @family SparkDataFrame functions
#' @aliases write.df,SparkDataFrame,character-method
@@ -2626,10 +2654,11 @@ setMethod("saveDF",
#' ignore: The save operation is expected to not save the contents of the SparkDataFrame
#' and to not change the existing data. \cr
#'
-#' @param df A SparkDataFrame
-#' @param tableName A name for the table
-#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param df a SparkDataFrame.
+#' @param tableName a name for the table.
+#' @param source a name for external data source.
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default).
+#' @param ... additional option(s) passed to the method.
#'
#' @family SparkDataFrame functions
#' @aliases saveAsTable,SparkDataFrame,character-method
@@ -2665,10 +2694,10 @@ setMethod("saveAsTable",
#' Computes statistics for numeric and string columns.
#' If no columns are given, this function computes statistics for all numerical or string columns.
#'
-#' @param x A SparkDataFrame to be computed.
-#' @param col A string of name
-#' @param ... Additional expressions
-#' @return A SparkDataFrame
+#' @param x a SparkDataFrame to be computed.
+#' @param col a string of name.
+#' @param ... additional expressions.
+#' @return A SparkDataFrame.
#' @family SparkDataFrame functions
#' @aliases describe,SparkDataFrame,character-method describe,SparkDataFrame,ANY-method
#' @rdname summary
@@ -2703,6 +2732,7 @@ setMethod("describe",
dataFrame(sdf)
})
+#' @param object a SparkDataFrame to be summarized.
#' @rdname summary
#' @name summary
#' @aliases summary,SparkDataFrame-method
@@ -2718,16 +2748,20 @@ setMethod("summary",
#'
#' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null values.
#'
-#' @param x A SparkDataFrame.
+#' @param x a SparkDataFrame.
#' @param how "any" or "all".
#' if "any", drop a row if it contains any nulls.
#' if "all", drop a row only if all its values are null.
#' if minNonNulls is specified, how is ignored.
-#' @param minNonNulls If specified, drop rows that have less than
+#' @param minNonNulls if specified, drop rows that have less than
#' minNonNulls non-null values.
#' This overwrites the how parameter.
-#' @param cols Optional list of column names to consider.
-#' @return A SparkDataFrame
+#' @param cols optional list of column names to consider. In `fillna`,
+#' columns specified in cols that do not have matching data
+#' type are ignored. For example, if value is a character, and
+#' subset contains a non-character column, then the non-character
+#' column is simply ignored.
+#' @return A SparkDataFrame.
#'
#' @family SparkDataFrame functions
#' @rdname nafunctions
@@ -2759,6 +2793,8 @@ setMethod("dropna",
dataFrame(sdf)
})
+#' @param object a SparkDataFrame.
+#' @param ... further arguments to be passed to or from other methods.
#' @rdname nafunctions
#' @name na.omit
#' @aliases na.omit,SparkDataFrame-method
@@ -2772,18 +2808,12 @@ setMethod("na.omit",
#' fillna - Replace null values.
#'
-#' @param x A SparkDataFrame.
-#' @param value Value to replace null values with.
+#' @param value value to replace null values with.
#' Should be an integer, numeric, character or named list.
#' If the value is a named list, then cols is ignored and
#' value must be a mapping from column name (character) to
#' replacement value. The replacement value must be an
#' integer, numeric or character.
-#' @param cols optional list of column names to consider.
-#' Columns specified in cols that do not have matching data
-#' type are ignored. For example, if value is a character, and
-#' subset contains a non-character column, then the non-character
-#' column is simply ignored.
#'
#' @rdname nafunctions
#' @name fillna
@@ -2848,8 +2878,11 @@ setMethod("fillna",
#' Since data.frames are held in memory, ensure that you have enough memory
#' in your system to accommodate the contents.
#'
-#' @param x a SparkDataFrame
-#' @return a data.frame
+#' @param x a SparkDataFrame.
+#' @param row.names NULL or a character vector giving the row names for the data frame.
+#' @param optional If `TRUE`, converting column names is optional.
+#' @param ... additional arguments to pass to base::as.data.frame.
+#' @return A data.frame.
#' @family SparkDataFrame functions
#' @aliases as.data.frame,SparkDataFrame-method
#' @rdname as.data.frame
@@ -3003,9 +3036,10 @@ setMethod("str",
#' Returns a new SparkDataFrame with columns dropped.
#' This is a no-op if schema doesn't contain column name(s).
#'
-#' @param x A SparkDataFrame.
-#' @param cols A character vector of column names or a Column.
-#' @return A SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param col a character vector of column names or a Column.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return A SparkDataFrame.
#'
#' @family SparkDataFrame functions
#' @rdname drop
@@ -3024,7 +3058,7 @@ setMethod("str",
#' @note drop since 2.0.0
setMethod("drop",
signature(x = "SparkDataFrame"),
- function(x, col) {
+ function(x, col, ...) {
stopifnot(class(col) == "character" || class(col) == "Column")
if (class(col) == "Column") {
@@ -3052,8 +3086,8 @@ setMethod("drop",
#'
#' @name histogram
#' @param nbins the number of bins (optional). Default value is 10.
+#' @param col the column as Character string or a Column to build the histogram from.
#' @param df the SparkDataFrame containing the Column to build the histogram from.
-#' @param colname the name of the column to build the histogram from.
#' @return a data.frame with the histogram statistics, i.e., counts and centroids.
#' @rdname histogram
#' @aliases histogram,SparkDataFrame,characterOrColumn-method
@@ -3184,10 +3218,11 @@ setMethod("histogram",
#' and to not change the existing data.
#' }
#'
-#' @param x A SparkDataFrame
-#' @param url JDBC database url of the form `jdbc:subprotocol:subname`
-#' @param tableName The name of the table in the external database
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param x s SparkDataFrame.
+#' @param url JDBC database url of the form `jdbc:subprotocol:subname`.
+#' @param tableName yhe name of the table in the external database.
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default).
+#' @param ... additional JDBC database connection properties.
#' @family SparkDataFrame functions
#' @rdname write.jdbc
#' @name write.jdbc
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 0c06bba639..a9cd2d85f8 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -165,9 +165,9 @@ getDefaultSqlSource <- function() {
#'
#' Converts R data.frame or list into SparkDataFrame.
#'
-#' @param data An RDD or list or data.frame
-#' @param schema a list of column names or named list (StructType), optional
-#' @return a SparkDataFrame
+#' @param data an RDD or list or data.frame.
+#' @param schema a list of column names or named list (StructType), optional.
+#' @return A SparkDataFrame.
#' @rdname createDataFrame
#' @export
#' @examples
@@ -257,23 +257,25 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
}
createDataFrame <- function(x, ...) {
- dispatchFunc("createDataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...)
+ dispatchFunc("createDataFrame(data, schema = NULL)", x, ...)
}
+#' @param samplingRatio Currently not used.
#' @rdname createDataFrame
#' @aliases createDataFrame
#' @export
#' @method as.DataFrame default
#' @note as.DataFrame since 1.6.0
as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
- createDataFrame(data, schema, samplingRatio)
+ createDataFrame(data, schema)
}
+#' @param ... additional argument(s).
#' @rdname createDataFrame
#' @aliases as.DataFrame
#' @export
-as.DataFrame <- function(x, ...) {
- dispatchFunc("as.DataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...)
+as.DataFrame <- function(data, ...) {
+ dispatchFunc("as.DataFrame(data, schema = NULL)", data, ...)
}
#' toDF
@@ -398,7 +400,7 @@ read.orc <- function(path) {
#'
#' Loads a Parquet file, returning the result as a SparkDataFrame.
#'
-#' @param path Path of file to read. A vector of multiple paths is allowed.
+#' @param path path of file to read. A vector of multiple paths is allowed.
#' @return SparkDataFrame
#' @rdname read.parquet
#' @export
@@ -418,6 +420,7 @@ read.parquet <- function(x, ...) {
dispatchFunc("read.parquet(...)", x, ...)
}
+#' @param ... argument(s) passed to the method.
#' @rdname read.parquet
#' @name parquetFile
#' @export
@@ -727,6 +730,7 @@ dropTempView <- function(viewName) {
#' @param source The name of external data source
#' @param schema The data schema defined in structType
#' @param na.strings Default string value for NA when source is "csv"
+#' @param ... additional external data source specific named properties.
#' @return SparkDataFrame
#' @rdname read.df
#' @name read.df
@@ -791,10 +795,11 @@ loadDF <- function(x, ...) {
#' If `source` is not specified, the default data source configured by
#' "spark.sql.sources.default" will be used.
#'
-#' @param tableName A name of the table
-#' @param path The path of files to load
-#' @param source the name of external data source
-#' @return SparkDataFrame
+#' @param tableName a name of the table.
+#' @param path the path of files to load.
+#' @param source the name of external data source.
+#' @param ... additional argument(s) passed to the method.
+#' @return A SparkDataFrame.
#' @rdname createExternalTable
#' @export
#' @examples
@@ -840,6 +845,7 @@ createExternalTable <- function(x, ...) {
#' clause expressions used to split the column `partitionColumn` evenly.
#' This defaults to SparkContext.defaultParallelism when unset.
#' @param predicates a list of conditions in the where clause; each one defines one partition
+#' @param ... additional JDBC database connection named propertie(s).
#' @return SparkDataFrame
#' @rdname read.jdbc
#' @name read.jdbc
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index 751ba3fde9..b55356b07d 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -54,8 +54,10 @@ setMethod("show", "WindowSpec",
#'
#' Defines the partitioning columns in a WindowSpec.
#'
-#' @param x a WindowSpec
-#' @return a WindowSpec
+#' @param x a WindowSpec.
+#' @param col a column to partition on (desribed by the name or Column).
+#' @param ... additional column(s) to partition on.
+#' @return A WindowSpec.
#' @rdname partitionBy
#' @name partitionBy
#' @aliases partitionBy,WindowSpec-method
@@ -86,7 +88,7 @@ setMethod("partitionBy",
#'
#' Defines the ordering columns in a WindowSpec.
#' @param x a WindowSpec
-#' @param col a character or Column object indicating an ordering column
+#' @param col a character or Column indicating an ordering column
#' @param ... additional sorting fields
#' @return A WindowSpec.
#' @name orderBy
@@ -192,6 +194,9 @@ setMethod("rangeBetween",
#'
#' Define a windowing column.
#'
+#' @param x a Column, usually one returned by window function(s).
+#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or
+#' `windowOrderBy` and configured by other WindowSpec methods.
#' @rdname over
#' @name over
#' @aliases over,Column,WindowSpec-method
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 0edb9d2ae5..af486e1ce2 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -163,8 +163,9 @@ setMethod("alias",
#' @family colum_func
#' @aliases substr,Column-method
#'
-#' @param start starting position
-#' @param stop ending position
+#' @param x a Column.
+#' @param start starting position.
+#' @param stop ending position.
#' @note substr since 1.4.0
setMethod("substr", signature(x = "Column"),
function(x, start, stop) {
@@ -219,6 +220,7 @@ setMethod("endsWith", signature(x = "Column"),
#' @family colum_func
#' @aliases between,Column-method
#'
+#' @param x a Column
#' @param bounds lower and upper bounds
#' @note between since 1.5.0
setMethod("between", signature(x = "Column"),
@@ -233,6 +235,11 @@ setMethod("between", signature(x = "Column"),
#' Casts the column to a different data type.
#'
+#' @param x a Column.
+#' @param dataType a character object describing the target data type.
+#' See
+#' \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
+#' Spark Data Types} for available data types.
#' @rdname cast
#' @name cast
#' @family colum_func
@@ -254,10 +261,12 @@ setMethod("cast",
#' Match a column with given values.
#'
+#' @param x a Column.
+#' @param table a collection of values (coercible to list) to compare with.
#' @rdname match
#' @name %in%
#' @aliases %in%,Column-method
-#' @return a matched values as a result of comparing with given values.
+#' @return A matched values as a result of comparing with given values.
#' @export
#' @examples
#' \dontrun{
@@ -277,6 +286,9 @@ setMethod("%in%",
#' If values in the specified column are null, returns the value.
#' Can be used in conjunction with `when` to specify a default value for expressions.
#'
+#' @param x a Column.
+#' @param value value to replace when the corresponding entry in \code{x} is NA.
+#' Can be a single value or a Column.
#' @rdname otherwise
#' @name otherwise
#' @family colum_func
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 573c915a5c..b3c10de71f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -23,6 +23,7 @@ NULL
#' A new \linkS4class{Column} is created to represent the literal value.
#' If the parameter is a \linkS4class{Column}, it is returned unchanged.
#'
+#' @param x a literal value or a Column.
#' @family normal_funcs
#' @rdname lit
#' @name lit
@@ -89,8 +90,6 @@ setMethod("acos",
#' Returns the approximate number of distinct items in a group. This is a column
#' aggregate function.
#'
-#' @param x Column to compute on.
-#'
#' @rdname approxCountDistinct
#' @name approxCountDistinct
#' @return the approximate number of distinct items in a group.
@@ -171,8 +170,6 @@ setMethod("atan",
#'
#' Aggregate function: returns the average of the values in a group.
#'
-#' @param x Column to compute on.
-#'
#' @rdname avg
#' @name avg
#' @family agg_funcs
@@ -319,7 +316,7 @@ setMethod("column",
#'
#' Computes the Pearson Correlation Coefficient for two Columns.
#'
-#' @param x Column to compute on.
+#' @param col2 a (second) Column.
#'
#' @rdname corr
#' @name corr
@@ -339,8 +336,6 @@ setMethod("corr", signature(x = "Column"),
#'
#' Compute the sample covariance between two expressions.
#'
-#' @param x Column to compute on.
-#'
#' @rdname cov
#' @name cov
#' @family math_funcs
@@ -362,8 +357,8 @@ setMethod("cov", signature(x = "characterOrColumn"),
#' @rdname cov
#'
-#' @param col1 First column to compute cov_samp.
-#' @param col2 Second column to compute cov_samp.
+#' @param col1 the first Column.
+#' @param col2 the second Column.
#' @name covar_samp
#' @aliases covar_samp,characterOrColumn,characterOrColumn-method
#' @note covar_samp since 2.0.0
@@ -451,9 +446,7 @@ setMethod("cosh",
#'
#' Returns the number of items in a group. This is a column aggregate function.
#'
-#' @param x Column to compute on.
-#'
-#' @rdname nrow
+#' @rdname count
#' @name count
#' @family agg_funcs
#' @aliases count,Column-method
@@ -493,6 +486,7 @@ setMethod("crc32",
#' Calculates the hash code of given columns, and returns the result as a int column.
#'
#' @param x Column to compute on.
+#' @param ... additional Column(s) to be included.
#'
#' @rdname hash
#' @name hash
@@ -663,7 +657,8 @@ setMethod("factorial",
#' The function by default returns the first values it sees. It will return the first non-missing
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
#'
-#' @param x Column to compute on.
+#' @param na.rm a logical value indicating whether NA values should be stripped
+#' before the computation proceeds.
#'
#' @rdname first
#' @name first
@@ -832,7 +827,10 @@ setMethod("kurtosis",
#' The function by default returns the last values it sees. It will return the last non-missing
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
#'
-#' @param x Column to compute on.
+#' @param x column to compute on.
+#' @param na.rm a logical value indicating whether NA values should be stripped
+#' before the computation proceeds.
+#' @param ... further arguments to be passed to or from other methods.
#'
#' @rdname last
#' @name last
@@ -1143,7 +1141,7 @@ setMethod("minute",
#' @export
#' @examples \dontrun{select(df, monotonically_increasing_id())}
setMethod("monotonically_increasing_id",
- signature(x = "missing"),
+ signature("missing"),
function() {
jc <- callJStatic("org.apache.spark.sql.functions", "monotonically_increasing_id")
column(jc)
@@ -1272,13 +1270,16 @@ setMethod("round",
#' bround
#'
-#' Returns the value of the column `e` rounded to `scale` decimal places using HALF_EVEN rounding
-#' mode if `scale` >= 0 or at integral part when `scale` < 0.
+#' Returns the value of the column \code{e} rounded to \code{scale} decimal places using HALF_EVEN rounding
+#' mode if \code{scale} >= 0 or at integer part when \code{scale} < 0.
#' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number.
#' bround(2.5, 0) = 2, bround(3.5, 0) = 4.
#'
#' @param x Column to compute on.
-#'
+#' @param scale round to \code{scale} digits to the right of the decimal point when \code{scale} > 0,
+#' the nearest even number when \code{scale} = 0, and \code{scale} digits to the left
+#' of the decimal point when \code{scale} < 0.
+#' @param ... further arguments to be passed to or from other methods.
#' @rdname bround
#' @name bround
#' @family math_funcs
@@ -1319,7 +1320,7 @@ setMethod("rtrim",
#' Aggregate function: alias for \link{stddev_samp}
#'
#' @param x Column to compute on.
-#'
+#' @param na.rm currently not used.
#' @rdname sd
#' @name sd
#' @family agg_funcs
@@ -1497,7 +1498,7 @@ setMethod("soundex",
#' \dontrun{select(df, spark_partition_id())}
#' @note spark_partition_id since 2.0.0
setMethod("spark_partition_id",
- signature(x = "missing"),
+ signature("missing"),
function() {
jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id")
column(jc)
@@ -1560,7 +1561,8 @@ setMethod("stddev_samp",
#'
#' Creates a new struct column that composes multiple input columns.
#'
-#' @param x Column to compute on.
+#' @param x a column to compute on.
+#' @param ... optional column(s) to be included.
#'
#' @rdname struct
#' @name struct
@@ -1831,8 +1833,8 @@ setMethod("upper",
#'
#' Aggregate function: alias for \link{var_samp}.
#'
-#' @param x Column to compute on.
-#'
+#' @param x a Column to compute on.
+#' @param y,na.rm,use currently not used.
#' @rdname var
#' @name var
#' @family agg_funcs
@@ -2114,7 +2116,9 @@ setMethod("pmod", signature(y = "Column"),
#' @rdname approxCountDistinct
#' @name approxCountDistinct
#'
+#' @param x Column to compute on.
#' @param rsd maximum estimation error allowed (default = 0.05)
+#' @param ... further arguments to be passed to or from other methods.
#'
#' @aliases approxCountDistinct,Column-method
#' @export
@@ -2127,7 +2131,7 @@ setMethod("approxCountDistinct",
column(jc)
})
-#' Count Distinct
+#' Count Distinct Values
#'
#' @param x Column to compute on
#' @param ... other columns
@@ -2156,7 +2160,7 @@ setMethod("countDistinct",
#' concat
#'
#' Concatenates multiple input string columns together into a single string column.
-#'
+#'
#' @param x Column to compute on
#' @param ... other columns
#'
@@ -2246,7 +2250,6 @@ setMethod("ceiling",
})
#' @rdname sign
-#' @param x Column to compute on
#'
#' @name sign
#' @aliases sign,Column-method
@@ -2262,9 +2265,6 @@ setMethod("sign", signature(x = "Column"),
#'
#' Aggregate function: returns the number of distinct items in a group.
#'
-#' @param x Column to compute on
-#' @param ... other columns
-#'
#' @rdname countDistinct
#' @name n_distinct
#' @aliases n_distinct,Column-method
@@ -2276,9 +2276,7 @@ setMethod("n_distinct", signature(x = "Column"),
countDistinct(x, ...)
})
-#' @rdname nrow
-#' @param x Column to compute on
-#'
+#' @rdname count
#' @name n
#' @aliases n,Column-method
#' @export
@@ -2300,8 +2298,8 @@ setMethod("n", signature(x = "Column"),
#' NOTE: Use when ever possible specialized functions like \code{year}. These benefit from a
#' specialized implementation.
#'
-#' @param y Column to compute on
-#' @param x date format specification
+#' @param y Column to compute on.
+#' @param x date format specification.
#'
#' @family datetime_funcs
#' @rdname date_format
@@ -2320,8 +2318,8 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#'
#' Assumes given timestamp is UTC and converts to given timezone.
#'
-#' @param y Column to compute on
-#' @param x time zone to use
+#' @param y Column to compute on.
+#' @param x time zone to use.
#'
#' @family datetime_funcs
#' @rdname from_utc_timestamp
@@ -2370,8 +2368,8 @@ setMethod("instr", signature(y = "Column", x = "character"),
#' Day of the week parameter is case insensitive, and accepts first three or two characters:
#' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
#'
-#' @param y Column to compute on
-#' @param x Day of the week string
+#' @param y Column to compute on.
+#' @param x Day of the week string.
#'
#' @family datetime_funcs
#' @rdname next_day
@@ -2637,6 +2635,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
#' Parses the expression string into the column that it represents, similar to
#' SparkDataFrame.selectExpr
#'
+#' @param x an expression character object to be parsed.
#' @family normal_funcs
#' @rdname expr
#' @aliases expr,character-method
@@ -2654,6 +2653,9 @@ setMethod("expr", signature(x = "character"),
#'
#' Formats the arguments in printf-style and returns the result as a string column.
#'
+#' @param format a character object of format strings.
+#' @param x a Column.
+#' @param ... additional Column(s).
#' @family string_funcs
#' @rdname format_string
#' @name format_string
@@ -2676,6 +2678,11 @@ setMethod("format_string", signature(format = "character", x = "Column"),
#' representing the timestamp of that moment in the current system time zone in the given
#' format.
#'
+#' @param x a Column of unix timestamp.
+#' @param format the target format. See
+#' \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#' Customizing Formats} for available options.
+#' @param ... further arguments to be passed to or from other methods.
#' @family datetime_funcs
#' @rdname from_unixtime
#' @name from_unixtime
@@ -2702,19 +2709,21 @@ setMethod("from_unixtime", signature(x = "Column"),
#' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
#' the order of months are not supported.
#'
-#' The time column must be of TimestampType.
-#'
-#' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
-#' interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
-#' If the `slideDuration` is not provided, the windows will be tumbling windows.
-#'
-#' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
-#' window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
-#' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
-#'
-#' The output column will be a struct called 'window' by default with the nested columns 'start'
-#' and 'end'.
-#'
+#' @param x a time Column. Must be of TimestampType.
+#' @param windowDuration a string specifying the width of the window, e.g. '1 second',
+#' '1 day 12 hours', '2 minutes'. Valid interval strings are 'week',
+#' 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
+#' @param slideDuration a string specifying the sliding interval of the window. Same format as
+#' \code{windowDuration}. A new window will be generated every
+#' \code{slideDuration}. Must be less than or equal to
+#' the \code{windowDuration}.
+#' @param startTime the offset with respect to 1970-01-01 00:00:00 UTC with which to start
+#' window intervals. For example, in order to have hourly tumbling windows
+#' that start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
+#' \code{startTime} as \code{"15 minutes"}.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return An output column of struct called 'window' by default with the nested columns 'start'
+#' and 'end'.
#' @family datetime_funcs
#' @rdname window
#' @name window
@@ -2766,6 +2775,10 @@ setMethod("window", signature(x = "Column"),
#' NOTE: The position is not zero based, but 1 based index, returns 0 if substr
#' could not be found in str.
#'
+#' @param substr a character string to be matched.
+#' @param str a Column where matches are sought for each entry.
+#' @param pos start position of search.
+#' @param ... further arguments to be passed to or from other methods.
#' @family string_funcs
#' @rdname locate
#' @aliases locate,character,Column-method
@@ -2785,6 +2798,9 @@ setMethod("locate", signature(substr = "character", str = "Column"),
#'
#' Left-pad the string column with
#'
+#' @param x the string Column to be left-padded.
+#' @param len maximum length of each output result.
+#' @param pad a character string to be padded with.
#' @family string_funcs
#' @rdname lpad
#' @aliases lpad,Column,numeric,character-method
@@ -2804,6 +2820,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
#'
#' Generate a random column with i.i.d. samples from U[0.0, 1.0].
#'
+#' @param seed a random seed. Can be missing.
#' @family normal_funcs
#' @rdname rand
#' @name rand
@@ -2832,6 +2849,7 @@ setMethod("rand", signature(seed = "numeric"),
#'
#' Generate a column with i.i.d. samples from the standard normal distribution.
#'
+#' @param seed a random seed. Can be missing.
#' @family normal_funcs
#' @rdname randn
#' @name randn
@@ -2860,6 +2878,9 @@ setMethod("randn", signature(seed = "numeric"),
#'
#' Extract a specific(idx) group identified by a java regex, from the specified string column.
#'
+#' @param x a string Column.
+#' @param pattern a regular expression.
+#' @param idx a group index.
#' @family string_funcs
#' @rdname regexp_extract
#' @name regexp_extract
@@ -2880,6 +2901,9 @@ setMethod("regexp_extract",
#'
#' Replace all substrings of the specified string value that match regexp with rep.
#'
+#' @param x a string Column.
+#' @param pattern a regular expression.
+#' @param replacement a character string that a matched \code{pattern} is replaced with.
#' @family string_funcs
#' @rdname regexp_replace
#' @name regexp_replace
@@ -2900,6 +2924,9 @@ setMethod("regexp_replace",
#'
#' Right-padded with pad to a length of len.
#'
+#' @param x the string Column to be right-padded.
+#' @param len maximum length of each output result.
+#' @param pad a character string to be padded with.
#' @family string_funcs
#' @rdname rpad
#' @name rpad
@@ -2922,6 +2949,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
#' returned. If count is negative, every to the right of the final delimiter (counting from the
#' right) is returned. substring_index performs a case-sensitive match when searching for delim.
#'
+#' @param x a Column.
+#' @param delim a delimiter string.
+#' @param count number of occurrences of \code{delim} before the substring is returned.
+#' A positive number means counting from the left, while negative means
+#' counting from the right.
#' @family string_funcs
#' @rdname substring_index
#' @aliases substring_index,Column,character,numeric-method
@@ -2949,6 +2981,11 @@ setMethod("substring_index",
#' The translate will happen when any character in the string matching with the character
#' in the matchingString.
#'
+#' @param x a string Column.
+#' @param matchingString a source string where each character will be translated.
+#' @param replaceString a target string where each \code{matchingString} character will
+#' be replaced by the character in \code{replaceString}
+#' at the same location, if any.
#' @family string_funcs
#' @rdname translate
#' @name translate
@@ -2997,6 +3034,10 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
column(jc)
})
+#' @param x a Column of date, in string, date or timestamp type.
+#' @param format the target format. See
+#' \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#' Customizing Formats} for available options.
#' @rdname unix_timestamp
#' @name unix_timestamp
#' @aliases unix_timestamp,Column,character-method
@@ -3012,6 +3053,8 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
#' Evaluates a list of conditions and returns one of multiple possible result expressions.
#' For unmatched expressions null is returned.
#'
+#' @param condition the condition to test on. Must be a Column expression.
+#' @param value result expression.
#' @family normal_funcs
#' @rdname when
#' @name when
@@ -3033,6 +3076,9 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
#' Evaluates a list of conditions and returns \code{yes} if the conditions are satisfied.
#' Otherwise \code{no} is returned for unmatched conditions.
#'
+#' @param test a Column expression that describes the condition.
+#' @param yes return values for \code{TRUE} elements of test.
+#' @param no return values for \code{FALSE} elements of test.
#' @family normal_funcs
#' @rdname ifelse
#' @name ifelse
@@ -3074,10 +3120,14 @@ setMethod("ifelse",
#' @family window_funcs
#' @aliases cume_dist,missing-method
#' @export
-#' @examples \dontrun{cume_dist()}
+#' @examples \dontrun{
+#' df <- createDataFrame(iris)
+#' ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length")
+#' out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species)
+#' }
#' @note cume_dist since 1.6.0
setMethod("cume_dist",
- signature(x = "missing"),
+ signature("missing"),
function() {
jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist")
column(jc)
@@ -3101,7 +3151,7 @@ setMethod("cume_dist",
#' @examples \dontrun{dense_rank()}
#' @note dense_rank since 1.6.0
setMethod("dense_rank",
- signature(x = "missing"),
+ signature("missing"),
function() {
jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank")
column(jc)
@@ -3115,6 +3165,11 @@ setMethod("dense_rank",
#'
#' This is equivalent to the LAG function in SQL.
#'
+#' @param x the column as a character string or a Column to compute on.
+#' @param offset the number of rows back from the current row from which to obtain a value.
+#' If not specified, the default is 1.
+#' @param defaultValue default to use when the offset row does not exist.
+#' @param ... further arguments to be passed to or from other methods.
#' @rdname lag
#' @name lag
#' @aliases lag,characterOrColumn-method
@@ -3143,7 +3198,7 @@ setMethod("lag",
#' an `offset` of one will return the next row at any given point in the window partition.
#'
#' This is equivalent to the LEAD function in SQL.
-#'
+#'
#' @param x Column to compute on
#' @param offset Number of rows to offset
#' @param defaultValue (Optional) default value to use
@@ -3211,7 +3266,7 @@ setMethod("ntile",
#' @examples \dontrun{percent_rank()}
#' @note percent_rank since 1.6.0
setMethod("percent_rank",
- signature(x = "missing"),
+ signature("missing"),
function() {
jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank")
column(jc)
@@ -3243,6 +3298,8 @@ setMethod("rank",
})
# Expose rank() in the R base package
+#' @param x a numeric, complex, character or logical vector.
+#' @param ... additional argument(s) passed to the method.
#' @name rank
#' @rdname rank
#' @aliases rank,ANY-method
@@ -3267,7 +3324,7 @@ setMethod("rank",
#' @examples \dontrun{row_number()}
#' @note row_number since 1.6.0
setMethod("row_number",
- signature(x = "missing"),
+ signature("missing"),
function() {
jc <- callJStatic("org.apache.spark.sql.functions", "row_number")
column(jc)
@@ -3318,7 +3375,7 @@ setMethod("explode",
#' size
#'
#' Returns length of array or map.
-#'
+#'
#' @param x Column to compute on
#'
#' @rdname size
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 693aa31d3e..6610a25c8c 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -380,6 +380,9 @@ setGeneric("value", function(bcast) { standardGeneric("value") })
#################### SparkDataFrame Methods ########################
+#' @param x a SparkDataFrame or GroupedData.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return A SparkDataFrame.
#' @rdname summarize
#' @export
setGeneric("agg", function (x, ...) { standardGeneric("agg") })
@@ -407,6 +410,8 @@ setGeneric("cache", function(x) { standardGeneric("cache") })
#' @export
setGeneric("collect", function(x, ...) { standardGeneric("collect") })
+#' @param do.NULL currently not used.
+#' @param prefix currently not used.
#' @rdname columns
#' @export
setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") })
@@ -427,15 +432,24 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
#' @export
setGeneric("columns", function(x) {standardGeneric("columns") })
-#' @rdname nrow
+#' @param x a GroupedData or Column.
+#' @rdname count
#' @export
setGeneric("count", function(x) { standardGeneric("count") })
#' @rdname cov
+#' @param x a Column object or a SparkDataFrame.
+#' @param ... additional argument(s). If `x` is a Column object, a Column object
+#' should be provided. If `x` is a SparkDataFrame, two column names should
+#' be provided.
#' @export
setGeneric("cov", function(x, ...) {standardGeneric("cov") })
#' @rdname corr
+#' @param x a Column object or a SparkDataFrame.
+#' @param ... additional argument(s). If `x` is a Column object, a Column object
+#' should be provided. If `x` is a SparkDataFrame, two column names should
+#' be provided.
#' @export
setGeneric("corr", function(x, ...) {standardGeneric("corr") })
@@ -462,10 +476,14 @@ setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") })
#' @export
setGeneric("dapplyCollect", function(x, func) { standardGeneric("dapplyCollect") })
+#' @param x a SparkDataFrame or GroupedData.
+#' @param ... additional argument(s) passed to the method.
#' @rdname gapply
#' @export
setGeneric("gapply", function(x, ...) { standardGeneric("gapply") })
+#' @param x a SparkDataFrame or GroupedData.
+#' @param ... additional argument(s) passed to the method.
#' @rdname gapplyCollect
#' @export
setGeneric("gapplyCollect", function(x, ...) { standardGeneric("gapplyCollect") })
@@ -667,8 +685,8 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr")
#' @export
setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
-# @rdname subset
-# @export
+#' @rdname subset
+#' @export
setGeneric("subset", function(x, ...) { standardGeneric("subset") })
#' @rdname summarize
@@ -735,6 +753,8 @@ setGeneric("between", function(x, bounds) { standardGeneric("between") })
setGeneric("cast", function(x, dataType) { standardGeneric("cast") })
#' @rdname columnfunctions
+#' @param x a Column object.
+#' @param ... additional argument(s).
#' @export
setGeneric("contains", function(x, ...) { standardGeneric("contains") })
@@ -830,6 +850,8 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain
#' @export
setGeneric("ascii", function(x) { standardGeneric("ascii") })
+#' @param x Column to compute on or a GroupedData object.
+#' @param ... additional argument(s) when `x` is a GroupedData object.
#' @rdname avg
#' @export
setGeneric("avg", function(x, ...) { standardGeneric("avg") })
@@ -886,9 +908,10 @@ setGeneric("crc32", function(x) { standardGeneric("crc32") })
#' @export
setGeneric("hash", function(x, ...) { standardGeneric("hash") })
+#' @param x empty. Should be used with no argument.
#' @rdname cume_dist
#' @export
-setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })
+setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") })
#' @rdname datediff
#' @export
@@ -918,9 +941,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
#' @export
setGeneric("decode", function(x, charset) { standardGeneric("decode") })
+#' @param x empty. Should be used with no argument.
#' @rdname dense_rank
#' @export
-setGeneric("dense_rank", function(x) { standardGeneric("dense_rank") })
+setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank") })
#' @rdname encode
#' @export
@@ -1034,10 +1058,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") })
#' @export
setGeneric("minute", function(x) { standardGeneric("minute") })
+#' @param x empty. Should be used with no argument.
#' @rdname monotonically_increasing_id
#' @export
setGeneric("monotonically_increasing_id",
- function(x) { standardGeneric("monotonically_increasing_id") })
+ function(x = "missing") { standardGeneric("monotonically_increasing_id") })
#' @rdname month
#' @export
@@ -1047,7 +1072,7 @@ setGeneric("month", function(x) { standardGeneric("month") })
#' @export
setGeneric("months_between", function(y, x) { standardGeneric("months_between") })
-#' @rdname nrow
+#' @rdname count
#' @export
setGeneric("n", function(x) { standardGeneric("n") })
@@ -1071,9 +1096,10 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
#' @export
setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
+#' @param x empty. Should be used with no argument.
#' @rdname percent_rank
#' @export
-setGeneric("percent_rank", function(x) { standardGeneric("percent_rank") })
+setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
#' @rdname pmod
#' @export
@@ -1114,11 +1140,12 @@ setGeneric("reverse", function(x) { standardGeneric("reverse") })
#' @rdname rint
#' @export
-setGeneric("rint", function(x, ...) { standardGeneric("rint") })
+setGeneric("rint", function(x) { standardGeneric("rint") })
+#' @param x empty. Should be used with no argument.
#' @rdname row_number
#' @export
-setGeneric("row_number", function(x) { standardGeneric("row_number") })
+setGeneric("row_number", function(x = "missing") { standardGeneric("row_number") })
#' @rdname rpad
#' @export
@@ -1176,9 +1203,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array")
#' @export
setGeneric("soundex", function(x) { standardGeneric("soundex") })
+#' @param x empty. Should be used with no argument.
#' @rdname spark_partition_id
#' @export
-setGeneric("spark_partition_id", function(x) { standardGeneric("spark_partition_id") })
+setGeneric("spark_partition_id", function(x = "missing") { standardGeneric("spark_partition_id") })
#' @rdname sd
#' @export
@@ -1276,10 +1304,16 @@ setGeneric("year", function(x) { standardGeneric("year") })
#' @export
setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.glm") })
+#' @param x,y For \code{glm}: logical values indicating whether the response vector
+#' and model matrix used in the fitting process should be returned as
+#' components of the returned value.
+#' @inheritParams stats::glm
#' @rdname glm
#' @export
setGeneric("glm")
+#' @param object a fitted ML model object.
+#' @param ... additional argument(s) passed to the method.
#' @rdname predict
#' @export
setGeneric("predict", function(object, ...) { standardGeneric("predict") })
@@ -1302,7 +1336,7 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
#' @rdname spark.survreg
#' @export
-setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })
+setGeneric("spark.survreg", function(data, formula) { standardGeneric("spark.survreg") })
#' @rdname spark.lda
#' @param ... Additional parameters to tune LDA.
@@ -1328,7 +1362,9 @@ setGeneric("spark.gaussianMixture",
standardGeneric("spark.gaussianMixture")
})
-#' write.ml
+#' @param object a fitted ML model object.
+#' @param path the directory where the model is saved.
+#' @param ... additional argument(s) passed to the method.
#' @rdname write.ml
#' @export
setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 85348ae76b..3c85ada91a 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -59,8 +59,7 @@ setMethod("show", "GroupedData",
#' Count the number of rows for each group.
#' The resulting SparkDataFrame will also contain the grouping columns.
#'
-#' @param x a GroupedData
-#' @return a SparkDataFrame
+#' @return A SparkDataFrame.
#' @rdname count
#' @aliases count,GroupedData-method
#' @export
@@ -83,8 +82,6 @@ setMethod("count",
#' df2 <- agg(df, <column> = <aggFunction>)
#' df2 <- agg(df, newColName = aggFunction(column))
#'
-#' @param x a GroupedData
-#' @return a SparkDataFrame
#' @rdname summarize
#' @aliases agg,GroupedData-method
#' @name agg
@@ -201,7 +198,6 @@ createMethods()
#' gapply
#'
-#' @param x A GroupedData
#' @rdname gapply
#' @aliases gapply,GroupedData-method
#' @name gapply
@@ -216,7 +212,6 @@ setMethod("gapply",
#' gapplyCollect
#'
-#' @param x A GroupedData
#' @rdname gapplyCollect
#' @aliases gapplyCollect,GroupedData-method
#' @name gapplyCollect
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 36f38fc73a..9a53c80aec 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -113,17 +113,18 @@ NULL
#' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
#'
-#' @param data SparkDataFrame for training.
-#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
#' operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param family A description of the error distribution and link function to be used in the model.
+#' @param family a description of the error distribution and link function to be used in the model.
#' This can be a character string naming a family function, a family function or
#' the result of a call to a family function. Refer R family at
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param tol Positive convergence tolerance of iterations.
-#' @param maxIter Integer giving the maximal number of IRLS iterations.
-#' @param weightCol The weight column name. If this is not set or NULL, we treat all instance
+#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance
#' weights as 1.0.
+#' @param tol positive convergence tolerance of iterations.
+#' @param maxIter integer giving the maximal number of IRLS iterations.
+#' @param ... additional arguments passed to the method.
#' @aliases spark.glm,SparkDataFrame,formula-method
#' @return \code{spark.glm} returns a fitted generalized linear model
#' @rdname spark.glm
@@ -178,17 +179,17 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
#' Generalized Linear Models (R-compliant)
#'
#' Fits a generalized linear model, similarly to R's glm().
-#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
#' operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param data SparkDataFrame for training.
-#' @param family A description of the error distribution and link function to be used in the model.
+#' @param data a SparkDataFrame or R's glm data for training.
+#' @param family a description of the error distribution and link function to be used in the model.
#' This can be a character string naming a family function, a family function or
#' the result of a call to a family function. Refer R family at
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param epsilon Positive convergence tolerance of iterations.
-#' @param maxit Integer giving the maximal number of IRLS iterations.
-#' @param weightCol The weight column name. If this is not set or NULL, we treat all instance
+#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance
#' weights as 1.0.
+#' @param epsilon positive convergence tolerance of iterations.
+#' @param maxit integer giving the maximal number of IRLS iterations.
#' @return \code{glm} returns a fitted generalized linear model.
#' @rdname glm
#' @export
@@ -209,7 +210,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
# Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
-#' @param object A fitted generalized linear model
+#' @param object a fitted generalized linear model.
#' @return \code{summary} returns a summary object of the fitted model, a list of components
#' including at least the coefficients, null/residual deviance, null/residual degrees
#' of freedom, AIC and number of iterations IRLS takes.
@@ -250,7 +251,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
# Prints the summary of GeneralizedLinearRegressionModel
#' @rdname spark.glm
-#' @param x Summary object of fitted generalized linear model returned by \code{summary} function
+#' @param x summary object of fitted generalized linear model returned by \code{summary} function
#' @export
#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@@ -282,7 +283,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
# Makes predictions from a generalized linear model produced by glm() or spark.glm(),
# similarly to R's predict().
-#' @param newData SparkDataFrame for testing
+#' @param newData a SparkDataFrame for testing.
#' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named
#' "prediction"
#' @rdname spark.glm
@@ -296,7 +297,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
# Makes predictions from a naive Bayes model or a model produced by spark.naiveBayes(),
# similarly to R package e1071's predict.
-#' @param newData A SparkDataFrame for testing
+#' @param newData a SparkDataFrame for testing.
#' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named
#' "prediction"
#' @rdname spark.naiveBayes
@@ -309,9 +310,9 @@ setMethod("predict", signature(object = "NaiveBayesModel"),
# Returns the summary of a naive Bayes model produced by \code{spark.naiveBayes}
-#' @param object A naive Bayes model fitted by \code{spark.naiveBayes}
+#' @param object a naive Bayes model fitted by \code{spark.naiveBayes}.
#' @return \code{summary} returns a list containing \code{apriori}, the label distribution, and
-#' \code{tables}, conditional probabilities given the target label
+#' \code{tables}, conditional probabilities given the target label.
#' @rdname spark.naiveBayes
#' @export
#' @note summary(NaiveBayesModel) since 2.0.0
@@ -491,7 +492,6 @@ setMethod("predict", signature(object = "IsotonicRegressionModel"),
# Get the summary of an IsotonicRegressionModel model
-#' @param object a fitted IsotonicRegressionModel
#' @param ... Other optional arguments to summary of an IsotonicRegressionModel
#' @return \code{summary} returns the model's boundaries and prediction as lists
#' @rdname spark.isoreg
@@ -512,14 +512,15 @@ setMethod("summary", signature(object = "IsotonicRegressionModel"),
#' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
#'
-#' @param data SparkDataFrame for training
-#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
#' operators are supported, including '~', '.', ':', '+', and '-'.
#' Note that the response variable of formula is empty in spark.kmeans.
-#' @param k Number of centers
-#' @param maxIter Maximum iteration number
-#' @param initMode The initialization algorithm choosen to fit the model
-#' @return \code{spark.kmeans} returns a fitted k-means model
+#' @param k number of centers.
+#' @param maxIter maximum iteration number.
+#' @param initMode the initialization algorithm choosen to fit the model.
+#' @param ... additional argument(s) passed to the method.
+#' @return \code{spark.kmeans} returns a fitted k-means model.
#' @rdname spark.kmeans
#' @aliases spark.kmeans,SparkDataFrame,formula-method
#' @name spark.kmeans
@@ -560,8 +561,11 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"
#' Get fitted result from a k-means model, similarly to R's fitted().
#' Note: A saved-loaded model does not support this method.
#'
-#' @param object A fitted k-means model
-#' @return \code{fitted} returns a SparkDataFrame containing fitted values
+#' @param object a fitted k-means model.
+#' @param method type of fitted results, \code{"centers"} for cluster centers
+#' or \code{"classes"} for assigned classes.
+#' @param ... additional argument(s) passed to the method.
+#' @return \code{fitted} returns a SparkDataFrame containing fitted values.
#' @rdname fitted
#' @export
#' @examples
@@ -585,8 +589,8 @@ setMethod("fitted", signature(object = "KMeansModel"),
# Get the summary of a k-means model
-#' @param object A fitted k-means model
-#' @return \code{summary} returns the model's coefficients, size and cluster
+#' @param object a fitted k-means model.
+#' @return \code{summary} returns the model's coefficients, size and cluster.
#' @rdname spark.kmeans
#' @export
#' @note summary(KMeansModel) since 2.0.0
@@ -612,7 +616,8 @@ setMethod("summary", signature(object = "KMeansModel"),
# Predicted values based on a k-means model
-#' @return \code{predict} returns the predicted values based on a k-means model
+#' @param newData a SparkDataFrame for testing.
+#' @return \code{predict} returns the predicted values based on a k-means model.
#' @rdname spark.kmeans
#' @export
#' @note predict(KMeansModel) since 2.0.0
@@ -628,11 +633,12 @@ setMethod("predict", signature(object = "KMeansModel"),
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
#' Only categorical data is supported.
#'
-#' @param data A \code{SparkDataFrame} of observations and labels for model fitting
-#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
+#' @param data a \code{SparkDataFrame} of observations and labels for model fitting.
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
#' operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param smoothing Smoothing parameter
-#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model
+#' @param smoothing smoothing parameter.
+#' @param ... additional argument(s) passed to the method. Currently only \code{smoothing}.
+#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model.
#' @rdname spark.naiveBayes
#' @aliases spark.naiveBayes,SparkDataFrame,formula-method
#' @name spark.naiveBayes
@@ -668,8 +674,8 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form
# Saves the Bernoulli naive Bayes model to the input path.
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
+#' @param path the directory where the model is saved
+#' @param overwrite overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname spark.naiveBayes
@@ -687,10 +693,9 @@ setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"),
# Saves the AFT survival regression model to the input path.
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
-#'
#' @rdname spark.survreg
#' @export
#' @note write.ml(AFTSurvivalRegressionModel, character) since 2.0.0
@@ -706,8 +711,8 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
# Saves the generalized linear model to the input path.
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname spark.glm
@@ -724,8 +729,8 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
# Save fitted MLlib model to the input path
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. Default is FALSE
#' which means throw exception if the output path exists.
#'
#' @rdname spark.kmeans
@@ -780,8 +785,8 @@ setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "charact
#' Load a fitted MLlib model from the input path.
#'
-#' @param path Path of the model to read.
-#' @return a fitted MLlib model
+#' @param path path of the model to read.
+#' @return A fitted MLlib model.
#' @rdname read.ml
#' @name read.ml
#' @export
@@ -823,11 +828,11 @@ read.ml <- function(path) {
#' \code{predict} to make predictions on new data, and \code{write.ml}/\code{read.ml} to
#' save/load fitted models.
#'
-#' @param data A SparkDataFrame for training
-#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
#' operators are supported, including '~', ':', '+', and '-'.
-#' Note that operator '.' is not supported currently
-#' @return \code{spark.survreg} returns a fitted AFT survival regression model
+#' Note that operator '.' is not supported currently.
+#' @return \code{spark.survreg} returns a fitted AFT survival regression model.
#' @rdname spark.survreg
#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/}
#' @export
@@ -851,7 +856,7 @@ read.ml <- function(path) {
#' }
#' @note spark.survreg since 2.0.0
setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula"),
- function(data, formula, ...) {
+ function(data, formula) {
formula <- paste(deparse(formula), collapse = "")
jobj <- callJStatic("org.apache.spark.ml.r.AFTSurvivalRegressionWrapper",
"fit", formula, data@sdf)
@@ -927,14 +932,14 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"),
# Returns a summary of the AFT survival regression model produced by spark.survreg,
# similarly to R's summary().
-#' @param object A fitted AFT survival regression model
+#' @param object a fitted AFT survival regression model.
#' @return \code{summary} returns a list containing the model's coefficients,
#' intercept and log(scale)
#' @rdname spark.survreg
#' @export
#' @note summary(AFTSurvivalRegressionModel) since 2.0.0
setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
- function(object, ...) {
+ function(object) {
jobj <- object@jobj
features <- callJMethod(jobj, "rFeatures")
coefficients <- callJMethod(jobj, "rCoefficients")
@@ -947,9 +952,9 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
# Makes predictions from an AFT survival regression model or a model produced by
# spark.survreg, similarly to R package survival's predict.
-#' @param newData A SparkDataFrame for testing
+#' @param newData a SparkDataFrame for testing.
#' @return \code{predict} returns a SparkDataFrame containing predicted values
-#' on the original scale of the data (mean predicted value at scale = 1.0)
+#' on the original scale of the data (mean predicted value at scale = 1.0).
#' @rdname spark.survreg
#' @export
#' @note predict(AFTSurvivalRegressionModel) since 2.0.0
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index b429f5de13..cb5bdb9017 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -92,8 +92,9 @@ print.structType <- function(x, ...) {
#'
#' Create a structField object that contains the metadata for a single field in a schema.
#'
-#' @param x The name of the field
-#' @return a structField object
+#' @param x the name of the field.
+#' @param ... additional argument(s) passed to the method.
+#' @return A structField object.
#' @rdname structField
#' @export
#' @examples
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index f8bdee739e..85815af1f3 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -320,14 +320,15 @@ sparkRHive.init <- function(jsc = NULL) {
#' For details on how to initialize and use SparkR, refer to SparkR programming guide at
#' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.
#'
-#' @param master The Spark master URL
-#' @param appName Application name to register with cluster manager
-#' @param sparkHome Spark Home directory
-#' @param sparkConfig Named list of Spark configuration to set on worker nodes
-#' @param sparkJars Character vector of jar files to pass to the worker nodes
-#' @param sparkPackages Character vector of packages from spark-packages.org
-#' @param enableHiveSupport Enable support for Hive, fallback if not built with Hive support; once
+#' @param master the Spark master URL.
+#' @param appName application name to register with cluster manager.
+#' @param sparkHome Spark Home directory.
+#' @param sparkConfig named list of Spark configuration to set on worker nodes.
+#' @param sparkJars character vector of jar files to pass to the worker nodes.
+#' @param sparkPackages character vector of packages from spark-packages.org
+#' @param enableHiveSupport enable support for Hive, fallback if not built with Hive support; once
#' set, this cannot be turned off on an existing session
+#' @param ... named Spark properties passed to the method.
#' @export
#' @examples
#'\dontrun{
@@ -413,9 +414,9 @@ sparkR.session <- function(
#' Assigns a group ID to all the jobs started by this thread until the group ID is set to a
#' different value or cleared.
#'
-#' @param groupid the ID to be assigned to job groups
-#' @param description description for the job group ID
-#' @param interruptOnCancel flag to indicate if the job is interrupted on job cancellation
+#' @param groupId the ID to be assigned to job groups.
+#' @param description description for the job group ID.
+#' @param interruptOnCancel flag to indicate if the job is interrupted on job cancellation.
#' @rdname setJobGroup
#' @name setJobGroup
#' @examples
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 2b4ce195cb..8ea24d8172 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -25,6 +25,7 @@ setOldClass("jobj")
#' table. The number of distinct values for each column should be less than 1e4. At most 1e6
#' non-zero pair frequencies will be returned.
#'
+#' @param x a SparkDataFrame
#' @param col1 name of the first column. Distinct items will make the first item of each row.
#' @param col2 name of the second column. Distinct items will make the column names of the output.
#' @return a local R data.frame representing the contingency table. The first column of each row
@@ -53,10 +54,9 @@ setMethod("crosstab",
#' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
#'
-#' @param x A SparkDataFrame
-#' @param col1 the name of the first column
-#' @param col2 the name of the second column
-#' @return the covariance of the two columns.
+#' @param colName1 the name of the first column
+#' @param colName2 the name of the second column
+#' @return The covariance of the two columns.
#'
#' @rdname cov
#' @name cov
@@ -71,19 +71,18 @@ setMethod("crosstab",
#' @note cov since 1.6.0
setMethod("cov",
signature(x = "SparkDataFrame"),
- function(x, col1, col2) {
- stopifnot(class(col1) == "character" && class(col2) == "character")
+ function(x, colName1, colName2) {
+ stopifnot(class(colName1) == "character" && class(colName2) == "character")
statFunctions <- callJMethod(x@sdf, "stat")
- callJMethod(statFunctions, "cov", col1, col2)
+ callJMethod(statFunctions, "cov", colName1, colName2)
})
#' Calculates the correlation of two columns of a SparkDataFrame.
#' Currently only supports the Pearson Correlation Coefficient.
#' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics.
#'
-#' @param x A SparkDataFrame
-#' @param col1 the name of the first column
-#' @param col2 the name of the second column
+#' @param colName1 the name of the first column
+#' @param colName2 the name of the second column
#' @param method Optional. A character specifying the method for calculating the correlation.
#' only "pearson" is allowed now.
#' @return The Pearson Correlation Coefficient as a Double.
@@ -102,10 +101,10 @@ setMethod("cov",
#' @note corr since 1.6.0
setMethod("corr",
signature(x = "SparkDataFrame"),
- function(x, col1, col2, method = "pearson") {
- stopifnot(class(col1) == "character" && class(col2) == "character")
+ function(x, colName1, colName2, method = "pearson") {
+ stopifnot(class(colName1) == "character" && class(colName2) == "character")
statFunctions <- callJMethod(x@sdf, "stat")
- callJMethod(statFunctions, "corr", col1, col2, method)
+ callJMethod(statFunctions, "corr", colName1, colName2, method)
})