aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--R/pkg/NAMESPACE6
-rw-r--r--R/pkg/R/DataFrame.R71
-rw-r--r--R/pkg/R/RDD.R10
-rw-r--r--R/pkg/R/SQLContext.R30
-rw-r--r--R/pkg/R/WindowSpec.R23
-rw-r--r--R/pkg/R/column.R2
-rw-r--r--R/pkg/R/functions.R36
-rw-r--r--R/pkg/R/generics.R15
-rw-r--r--R/pkg/R/group.R1
-rw-r--r--R/pkg/R/mllib.R19
-rw-r--r--R/pkg/R/pairRDD.R6
-rw-r--r--R/pkg/R/stats.R14
12 files changed, 119 insertions, 114 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index e1b87b28d3..709057675e 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,5 +1,9 @@
# Imports from base R
-importFrom(methods, setGeneric, setMethod, setOldClass)
+# Do not include stats:: "rpois", "runif" - causes error at runtime
+importFrom("methods", "setGeneric", "setMethod", "setOldClass")
+importFrom("methods", "is", "new", "signature", "show")
+importFrom("stats", "gaussian", "setNames")
+importFrom("utils", "download.file", "packageVersion", "untar")
# Disable native libraries till we figure out how to package it
# See SPARKR-7839
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 540dc3122d..52a6628ad7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -150,7 +150,7 @@ setMethod("explain",
#' isLocal
#'
-#' Returns True if the `collect` and `take` methods can be run locally
+#' Returns True if the \code{collect} and \code{take} methods can be run locally
#' (without any Spark executors).
#'
#' @param x A SparkDataFrame
@@ -182,7 +182,7 @@ setMethod("isLocal",
#' @param numRows the number of rows to print. Defaults to 20.
#' @param truncate whether truncate long strings. If \code{TRUE}, strings more than
#' 20 characters will be truncated. However, if set greater than zero,
-#' truncates strings longer than `truncate` characters and all cells
+#' truncates strings longer than \code{truncate} characters and all cells
#' will be aligned right.
#' @param ... further arguments to be passed to or from other methods.
#' @family SparkDataFrame functions
@@ -642,10 +642,10 @@ setMethod("unpersist",
#' The following options for repartition are possible:
#' \itemize{
#' \item{1.} {Return a new SparkDataFrame partitioned by
-#' the given columns into `numPartitions`.}
-#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
+#' the given columns into \code{numPartitions}.}
+#' \item{2.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.}
#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
-#' using `spark.sql.shuffle.partitions` as number of partitions.}
+#' using \code{spark.sql.shuffle.partitions} as number of partitions.}
#'}
#' @param x a SparkDataFrame.
#' @param numPartitions the number of partitions to use.
@@ -1132,9 +1132,8 @@ setMethod("take",
#' Head
#'
-#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL,
-#' then head() returns the first 6 rows in keeping with the current data.frame
-#' convention in R.
+#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If \code{num} is not
+#' specified, then head() returns the first 6 rows as with R data.frame.
#'
#' @param x a SparkDataFrame.
#' @param num the number of rows to return. Default is 6.
@@ -1406,11 +1405,11 @@ setMethod("dapplyCollect",
#'
#' @param cols grouping columns.
#' @param func a function to be applied to each group partition specified by grouping
-#' column of the SparkDataFrame. The function `func` takes as argument
+#' column of the SparkDataFrame. The function \code{func} takes as argument
#' a key - grouping columns and a data frame - a local R data.frame.
-#' The output of `func` is a local R data.frame.
+#' The output of \code{func} is a local R data.frame.
#' @param schema the schema of the resulting SparkDataFrame after the function is applied.
-#' The schema must match to output of `func`. It has to be defined for each
+#' The schema must match to output of \code{func}. It has to be defined for each
#' output column with preferred output column name and corresponding data type.
#' @return A SparkDataFrame.
#' @family SparkDataFrame functions
@@ -1497,9 +1496,9 @@ setMethod("gapply",
#'
#' @param cols grouping columns.
#' @param func a function to be applied to each group partition specified by grouping
-#' column of the SparkDataFrame. The function `func` takes as argument
+#' column of the SparkDataFrame. The function \code{func} takes as argument
#' a key - grouping columns and a data frame - a local R data.frame.
-#' The output of `func` is a local R data.frame.
+#' The output of \code{func} is a local R data.frame.
#' @return A data.frame.
#' @family SparkDataFrame functions
#' @aliases gapplyCollect,SparkDataFrame-method
@@ -1657,7 +1656,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
getColumn(x, name)
})
-#' @param value a Column or NULL. If NULL, the specified Column is dropped.
+#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped.
#' @rdname select
#' @name $<-
#' @aliases $<-,SparkDataFrame-method
@@ -1747,7 +1746,7 @@ setMethod("[", signature(x = "SparkDataFrame"),
#' @family subsetting functions
#' @examples
#' \dontrun{
-#' # Columns can be selected using `[[` and `[`
+#' # Columns can be selected using [[ and [
#' df[[2]] == df[["age"]]
#' df[,2] == df[,"age"]
#' df[,c("name", "age")]
@@ -1792,7 +1791,7 @@ setMethod("subset", signature(x = "SparkDataFrame"),
#' select(df, df$name, df$age + 1)
#' select(df, c("col1", "col2"))
#' select(df, list(df$name, df$age + 1))
-#' # Similar to R data frames columns can also be selected using `$`
+#' # Similar to R data frames columns can also be selected using $
#' df[,df$age]
#' }
#' @note select(SparkDataFrame, character) since 1.4.0
@@ -2443,7 +2442,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
#' Return a new SparkDataFrame containing the union of rows
#'
#' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
-#' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
+#' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL.
#' Note that this does not remove duplicate rows across the two SparkDataFrames.
#'
#' @param x A SparkDataFrame
@@ -2486,7 +2485,7 @@ setMethod("unionAll",
#' Union two or more SparkDataFrames
#'
-#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
+#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL.
#' Note that this does not remove duplicate rows across the two SparkDataFrames.
#'
#' @param x a SparkDataFrame.
@@ -2519,7 +2518,7 @@ setMethod("rbind",
#' Intersect
#'
#' Return a new SparkDataFrame containing rows only in both this SparkDataFrame
-#' and another SparkDataFrame. This is equivalent to `INTERSECT` in SQL.
+#' and another SparkDataFrame. This is equivalent to \code{INTERSECT} in SQL.
#'
#' @param x A SparkDataFrame
#' @param y A SparkDataFrame
@@ -2547,7 +2546,7 @@ setMethod("intersect",
#' except
#'
#' Return a new SparkDataFrame containing rows in this SparkDataFrame
-#' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL.
+#' but not in another SparkDataFrame. This is equivalent to \code{EXCEPT} in SQL.
#'
#' @param x a SparkDataFrame.
#' @param y a SparkDataFrame.
@@ -2576,8 +2575,8 @@ setMethod("except",
#' Save the contents of SparkDataFrame to a data source.
#'
-#' The data source is specified by the `source` and a set of options (...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options (...).
+#' If \code{source} is not specified, the default data source configured by
#' spark.sql.sources.default will be used.
#'
#' Additionally, mode is used to specify the behavior of the save operation when data already
@@ -2613,7 +2612,7 @@ setMethod("except",
#' @note write.df since 1.4.0
setMethod("write.df",
signature(df = "SparkDataFrame", path = "character"),
- function(df, path, source = NULL, mode = "error", ...){
+ function(df, path, source = NULL, mode = "error", ...) {
if (is.null(source)) {
source <- getDefaultSqlSource()
}
@@ -2635,14 +2634,14 @@ setMethod("write.df",
#' @note saveDF since 1.4.0
setMethod("saveDF",
signature(df = "SparkDataFrame", path = "character"),
- function(df, path, source = NULL, mode = "error", ...){
+ function(df, path, source = NULL, mode = "error", ...) {
write.df(df, path, source, mode, ...)
})
#' Save the contents of the SparkDataFrame to a data source as a table
#'
-#' The data source is specified by the `source` and a set of options (...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options (...).
+#' If \code{source} is not specified, the default data source configured by
#' spark.sql.sources.default will be used.
#'
#' Additionally, mode is used to specify the behavior of the save operation when
@@ -2675,7 +2674,7 @@ setMethod("saveDF",
#' @note saveAsTable since 1.4.0
setMethod("saveAsTable",
signature(df = "SparkDataFrame", tableName = "character"),
- function(df, tableName, source = NULL, mode="error", ...){
+ function(df, tableName, source = NULL, mode="error", ...) {
if (is.null(source)) {
source <- getDefaultSqlSource()
}
@@ -2752,11 +2751,11 @@ setMethod("summary",
#' @param how "any" or "all".
#' if "any", drop a row if it contains any nulls.
#' if "all", drop a row only if all its values are null.
-#' if minNonNulls is specified, how is ignored.
+#' if \code{minNonNulls} is specified, how is ignored.
#' @param minNonNulls if specified, drop rows that have less than
-#' minNonNulls non-null values.
+#' \code{minNonNulls} non-null values.
#' This overwrites the how parameter.
-#' @param cols optional list of column names to consider. In `fillna`,
+#' @param cols optional list of column names to consider. In \code{fillna},
#' columns specified in cols that do not have matching data
#' type are ignored. For example, if value is a character, and
#' subset contains a non-character column, then the non-character
@@ -2879,8 +2878,8 @@ setMethod("fillna",
#' in your system to accommodate the contents.
#'
#' @param x a SparkDataFrame.
-#' @param row.names NULL or a character vector giving the row names for the data frame.
-#' @param optional If `TRUE`, converting column names is optional.
+#' @param row.names \code{NULL} or a character vector giving the row names for the data frame.
+#' @param optional If \code{TRUE}, converting column names is optional.
#' @param ... additional arguments to pass to base::as.data.frame.
#' @return A data.frame.
#' @family SparkDataFrame functions
@@ -3058,7 +3057,7 @@ setMethod("str",
#' @note drop since 2.0.0
setMethod("drop",
signature(x = "SparkDataFrame"),
- function(x, col, ...) {
+ function(x, col) {
stopifnot(class(col) == "character" || class(col) == "Column")
if (class(col) == "Column") {
@@ -3218,8 +3217,8 @@ setMethod("histogram",
#' and to not change the existing data.
#' }
#'
-#' @param x s SparkDataFrame.
-#' @param url JDBC database url of the form `jdbc:subprotocol:subname`.
+#' @param x a SparkDataFrame.
+#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}.
#' @param tableName yhe name of the table in the external database.
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default).
#' @param ... additional JDBC database connection properties.
@@ -3237,7 +3236,7 @@ setMethod("histogram",
#' @note write.jdbc since 2.0.0
setMethod("write.jdbc",
signature(x = "SparkDataFrame", url = "character", tableName = "character"),
- function(x, url, tableName, mode = "error", ...){
+ function(x, url, tableName, mode = "error", ...) {
jmode <- convertToJSaveMode(mode)
jprops <- varargsToJProperties(...)
write <- callJMethod(x@sdf, "write")
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 6b254bb0d3..6cd0704003 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -887,17 +887,17 @@ setMethod("sampleRDD",
# Discards some random values to ensure each partition has a
# different random seed.
- runif(partIndex)
+ stats::runif(partIndex)
for (elem in part) {
if (withReplacement) {
- count <- rpois(1, fraction)
+ count <- stats::rpois(1, fraction)
if (count > 0) {
res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
len <- len + count
}
} else {
- if (runif(1) < fraction) {
+ if (stats::runif(1) < fraction) {
len <- len + 1
res[[len]] <- elem
}
@@ -965,7 +965,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
set.seed(seed)
samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
- as.integer(ceiling(runif(1,
+ as.integer(ceiling(stats::runif(1,
-MAXINT,
MAXINT)))))
# If the first sample didn't turn out large enough, keep trying to
@@ -973,7 +973,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
# multiplier for thei initial size
while (length(samples) < total)
samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
- as.integer(ceiling(runif(1,
+ as.integer(ceiling(stats::runif(1,
-MAXINT,
MAXINT)))))
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index a9cd2d85f8..572e71e25b 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -115,7 +115,7 @@ infer_type <- function(x) {
#' Get Runtime Config from the current active SparkSession
#'
#' Get Runtime Config from the current active SparkSession.
-#' To change SparkSession Runtime Config, please see `sparkR.session()`.
+#' To change SparkSession Runtime Config, please see \code{sparkR.session()}.
#'
#' @param key (optional) The key of the config to get, if omitted, all config is returned
#' @param defaultValue (optional) The default value of the config to return if they config is not
@@ -720,11 +720,11 @@ dropTempView <- function(viewName) {
#'
#' Returns the dataset in a data source as a SparkDataFrame
#'
-#' The data source is specified by the `source` and a set of options(...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options(...).
+#' If \code{source} is not specified, the default data source configured by
#' "spark.sql.sources.default" will be used. \cr
-#' Similar to R read.csv, when `source` is "csv", by default, a value of "NA" will be interpreted
-#' as NA.
+#' Similar to R read.csv, when \code{source} is "csv", by default, a value of "NA" will be
+#' interpreted as NA.
#'
#' @param path The path of files to load
#' @param source The name of external data source
@@ -791,8 +791,8 @@ loadDF <- function(x, ...) {
#' Creates an external table based on the dataset in a data source,
#' Returns a SparkDataFrame associated with the external table.
#'
-#' The data source is specified by the `source` and a set of options(...).
-#' If `source` is not specified, the default data source configured by
+#' The data source is specified by the \code{source} and a set of options(...).
+#' If \code{source} is not specified, the default data source configured by
#' "spark.sql.sources.default" will be used.
#'
#' @param tableName a name of the table.
@@ -830,22 +830,22 @@ createExternalTable <- function(x, ...) {
#' Additional JDBC database connection properties can be set (...)
#'
#' Only one of partitionColumn or predicates should be set. Partitions of the table will be
-#' retrieved in parallel based on the `numPartitions` or by the predicates.
+#' retrieved in parallel based on the \code{numPartitions} or by the predicates.
#'
#' Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
#' your external database systems.
#'
-#' @param url JDBC database url of the form `jdbc:subprotocol:subname`
+#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}
#' @param tableName the name of the table in the external database
#' @param partitionColumn the name of a column of integral type that will be used for partitioning
-#' @param lowerBound the minimum value of `partitionColumn` used to decide partition stride
-#' @param upperBound the maximum value of `partitionColumn` used to decide partition stride
-#' @param numPartitions the number of partitions, This, along with `lowerBound` (inclusive),
-#' `upperBound` (exclusive), form partition strides for generated WHERE
-#' clause expressions used to split the column `partitionColumn` evenly.
+#' @param lowerBound the minimum value of \code{partitionColumn} used to decide partition stride
+#' @param upperBound the maximum value of \code{partitionColumn} used to decide partition stride
+#' @param numPartitions the number of partitions, This, along with \code{lowerBound} (inclusive),
+#' \code{upperBound} (exclusive), form partition strides for generated WHERE
+#' clause expressions used to split the column \code{partitionColumn} evenly.
#' This defaults to SparkContext.defaultParallelism when unset.
#' @param predicates a list of conditions in the where clause; each one defines one partition
-#' @param ... additional JDBC database connection named propertie(s).
+#' @param ... additional JDBC database connection named properties.
#' @return SparkDataFrame
#' @rdname read.jdbc
#' @name read.jdbc
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index b55356b07d..ddd2ef2fcd 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -44,6 +44,7 @@ windowSpec <- function(sws) {
}
#' @rdname show
+#' @export
#' @note show(WindowSpec) since 2.0.0
setMethod("show", "WindowSpec",
function(object) {
@@ -125,11 +126,11 @@ setMethod("orderBy",
#' rowsBetween
#'
-#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} (inclusive).
#'
-#' Both `start` and `end` are relative positions from the current row. For example, "0" means
-#' "current row", while "-1" means the row before the current row, and "5" means the fifth row
-#' after the current row.
+#' Both \code{start} and \code{end} are relative positions from the current row. For example,
+#' "0" means "current row", while "-1" means the row before the current row, and "5" means the
+#' fifth row after the current row.
#'
#' @param x a WindowSpec
#' @param start boundary start, inclusive.
@@ -157,12 +158,12 @@ setMethod("rowsBetween",
#' rangeBetween
#'
-#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} (inclusive).
+#'
+#' Both \code{start} and \code{end} are relative from the current row. For example, "0" means
+#' "current row", while "-1" means one off before the current row, and "5" means the five off
+#' after the current row.
#'
-#' Both `start` and `end` are relative from the current row. For example, "0" means "current row",
-#' while "-1" means one off before the current row, and "5" means the five off after the
-#' current row.
-
#' @param x a WindowSpec
#' @param start boundary start, inclusive.
#' The frame is unbounded if this is the minimum long value.
@@ -195,8 +196,8 @@ setMethod("rangeBetween",
#' Define a windowing column.
#'
#' @param x a Column, usually one returned by window function(s).
-#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or
-#' `windowOrderBy` and configured by other WindowSpec methods.
+#' @param window a WindowSpec object. Can be created by \code{windowPartitionBy} or
+#' \code{windowOrderBy} and configured by other WindowSpec methods.
#' @rdname over
#' @name over
#' @aliases over,Column,WindowSpec-method
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index af486e1ce2..539d91b0f8 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -284,7 +284,7 @@ setMethod("%in%",
#' otherwise
#'
#' If values in the specified column are null, returns the value.
-#' Can be used in conjunction with `when` to specify a default value for expressions.
+#' Can be used in conjunction with \code{when} to specify a default value for expressions.
#'
#' @param x a Column.
#' @param value value to replace when the corresponding entry in \code{x} is NA.
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index b3c10de71f..f042adddef 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1250,7 +1250,7 @@ setMethod("rint",
#' round
#'
-#' Returns the value of the column `e` rounded to 0 decimal places using HALF_UP rounding mode.
+#' Returns the value of the column \code{e} rounded to 0 decimal places using HALF_UP rounding mode.
#'
#' @param x Column to compute on.
#'
@@ -1974,7 +1974,7 @@ setMethod("atan2", signature(y = "Column"),
#' datediff
#'
-#' Returns the number of days from `start` to `end`.
+#' Returns the number of days from \code{start} to \code{end}.
#'
#' @param x start Column to use.
#' @param y end Column to use.
@@ -2043,7 +2043,7 @@ setMethod("levenshtein", signature(y = "Column"),
#' months_between
#'
-#' Returns number of months between dates `date1` and `date2`.
+#' Returns number of months between dates \code{date1} and \code{date2}.
#'
#' @param x start Column to use.
#' @param y end Column to use.
@@ -2430,7 +2430,7 @@ setMethod("add_months", signature(y = "Column", x = "numeric"),
#' date_add
#'
-#' Returns the date that is `days` days after `start`
+#' Returns the date that is \code{x} days after
#'
#' @param y Column to compute on
#' @param x Number of days to add
@@ -2450,7 +2450,7 @@ setMethod("date_add", signature(y = "Column", x = "numeric"),
#' date_sub
#'
-#' Returns the date that is `days` days before `start`
+#' Returns the date that is \code{x} days before
#'
#' @param y Column to compute on
#' @param x Number of days to substract
@@ -3113,7 +3113,7 @@ setMethod("ifelse",
#' N = total number of rows in the partition
#' cume_dist(x) = number of values before (and including) x / N
#'
-#' This is equivalent to the CUME_DIST function in SQL.
+#' This is equivalent to the \code{CUME_DIST} function in SQL.
#'
#' @rdname cume_dist
#' @name cume_dist
@@ -3141,7 +3141,7 @@ setMethod("cume_dist",
#' and had three people tie for second place, you would say that all three were in second
#' place and that the next person came in third.
#'
-#' This is equivalent to the DENSE_RANK function in SQL.
+#' This is equivalent to the \code{DENSE_RANK} function in SQL.
#'
#' @rdname dense_rank
#' @name dense_rank
@@ -3159,11 +3159,11 @@ setMethod("dense_rank",
#' lag
#'
-#' Window function: returns the value that is `offset` rows before the current row, and
-#' `defaultValue` if there is less than `offset` rows before the current row. For example,
-#' an `offset` of one will return the previous row at any given point in the window partition.
+#' Window function: returns the value that is \code{offset} rows before the current row, and
+#' \code{defaultValue} if there is less than \code{offset} rows before the current row. For example,
+#' an \code{offset} of one will return the previous row at any given point in the window partition.
#'
-#' This is equivalent to the LAG function in SQL.
+#' This is equivalent to the \code{LAG} function in SQL.
#'
#' @param x the column as a character string or a Column to compute on.
#' @param offset the number of rows back from the current row from which to obtain a value.
@@ -3193,11 +3193,11 @@ setMethod("lag",
#' lead
#'
-#' Window function: returns the value that is `offset` rows after the current row, and
-#' `null` if there is less than `offset` rows after the current row. For example,
-#' an `offset` of one will return the next row at any given point in the window partition.
+#' Window function: returns the value that is \code{offset} rows after the current row, and
+#' NULL if there is less than \code{offset} rows after the current row. For example,
+#' an \code{offset} of one will return the next row at any given point in the window partition.
#'
-#' This is equivalent to the LEAD function in SQL.
+#' This is equivalent to the \code{LEAD} function in SQL.
#'
#' @param x Column to compute on
#' @param offset Number of rows to offset
@@ -3226,11 +3226,11 @@ setMethod("lead",
#' ntile
#'
-#' Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
-#' partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second
+#' Window function: returns the ntile group id (from 1 to n inclusive) in an ordered window
+#' partition. For example, if n is 4, the first quarter of the rows will get value 1, the second
#' quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
#'
-#' This is equivalent to the NTILE function in SQL.
+#' This is equivalent to the \code{NTILE} function in SQL.
#'
#' @param x Number of ntile groups
#'
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6610a25c8c..88884e6257 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -438,17 +438,17 @@ setGeneric("columns", function(x) {standardGeneric("columns") })
setGeneric("count", function(x) { standardGeneric("count") })
#' @rdname cov
-#' @param x a Column object or a SparkDataFrame.
-#' @param ... additional argument(s). If `x` is a Column object, a Column object
-#' should be provided. If `x` is a SparkDataFrame, two column names should
+#' @param x a Column or a SparkDataFrame.
+#' @param ... additional argument(s). If \code{x} is a Column, a Column
+#' should be provided. If \code{x} is a SparkDataFrame, two column names should
#' be provided.
#' @export
setGeneric("cov", function(x, ...) {standardGeneric("cov") })
#' @rdname corr
-#' @param x a Column object or a SparkDataFrame.
-#' @param ... additional argument(s). If `x` is a Column object, a Column object
-#' should be provided. If `x` is a SparkDataFrame, two column names should
+#' @param x a Column or a SparkDataFrame.
+#' @param ... additional argument(s). If \code{x} is a Column, a Column
+#' should be provided. If \code{x} is a SparkDataFrame, two column names should
#' be provided.
#' @export
setGeneric("corr", function(x, ...) {standardGeneric("corr") })
@@ -851,7 +851,7 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain
setGeneric("ascii", function(x) { standardGeneric("ascii") })
#' @param x Column to compute on or a GroupedData object.
-#' @param ... additional argument(s) when `x` is a GroupedData object.
+#' @param ... additional argument(s) when \code{x} is a GroupedData object.
#' @rdname avg
#' @export
setGeneric("avg", function(x, ...) { standardGeneric("avg") })
@@ -1339,7 +1339,6 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
setGeneric("spark.survreg", function(data, formula) { standardGeneric("spark.survreg") })
#' @rdname spark.lda
-#' @param ... Additional parameters to tune LDA.
#' @export
setGeneric("spark.lda", function(data, ...) { standardGeneric("spark.lda") })
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 3c85ada91a..e3479ef5fa 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -48,6 +48,7 @@ groupedData <- function(sgd) {
#' @rdname show
#' @aliases show,GroupedData-method
+#' @export
#' @note show(GroupedData) since 1.4.0
setMethod("show", "GroupedData",
function(object) {
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index b36fbcee17..a40310d194 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -131,7 +131,7 @@ predict_internal <- function(object, newData) {
#' This can be a character string naming a family function, a family function or
#' the result of a call to a family function. Refer R family at
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance
+#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
#' weights as 1.0.
#' @param tol positive convergence tolerance of iterations.
#' @param maxIter integer giving the maximal number of IRLS iterations.
@@ -197,7 +197,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
#' This can be a character string naming a family function, a family function or
#' the result of a call to a family function. Refer R family at
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance
+#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
#' weights as 1.0.
#' @param epsilon positive convergence tolerance of iterations.
#' @param maxit integer giving the maximal number of IRLS iterations.
@@ -434,8 +434,8 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
#' operators are supported, including '~', '.', ':', '+', and '-'.
#' @param isotonic Whether the output sequence should be isotonic/increasing (TRUE) or
#' antitonic/decreasing (FALSE)
-#' @param featureIndex The index of the feature if \code{featuresCol} is a vector column (default: `0`),
-#' no effect otherwise
+#' @param featureIndex The index of the feature if \code{featuresCol} is a vector column
+#' (default: 0), no effect otherwise
#' @param weightCol The weight column name.
#' @return \code{spark.isoreg} returns a fitted Isotonic Regression model
#' @rdname spark.isoreg
@@ -647,7 +647,7 @@ setMethod("predict", signature(object = "KMeansModel"),
#' @rdname spark.naiveBayes
#' @aliases spark.naiveBayes,SparkDataFrame,formula-method
#' @name spark.naiveBayes
-#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/}
+#' @seealso e1071: \url{https://cran.r-project.org/package=e1071}
#' @export
#' @examples
#' \dontrun{
@@ -815,7 +815,7 @@ read.ml <- function(path) {
#' Note that operator '.' is not supported currently.
#' @return \code{spark.survreg} returns a fitted AFT survival regression model.
#' @rdname spark.survreg
-#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/}
+#' @seealso survival: \url{https://cran.r-project.org/package=survival}
#' @export
#' @examples
#' \dontrun{
@@ -870,10 +870,11 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula
#' @param customizedStopWords stopwords that need to be removed from the given corpus. Ignore the
#' parameter if libSVM-format column is used as the features column.
#' @param maxVocabSize maximum vocabulary size, default 1 << 18
+#' @param ... additional argument(s) passed to the method.
#' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model
#' @rdname spark.lda
#' @aliases spark.lda,SparkDataFrame-method
-#' @seealso topicmodels: \url{https://cran.r-project.org/web/packages/topicmodels/}
+#' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels}
#' @export
#' @examples
#' \dontrun{
@@ -962,7 +963,7 @@ setMethod("predict", signature(object = "AFTSurvivalRegressionModel"),
#' @return \code{spark.gaussianMixture} returns a fitted multivariate gaussian mixture model.
#' @rdname spark.gaussianMixture
#' @name spark.gaussianMixture
-#' @seealso mixtools: \url{https://cran.r-project.org/web/packages/mixtools/}
+#' @seealso mixtools: \url{https://cran.r-project.org/package=mixtools}
#' @export
#' @examples
#' \dontrun{
@@ -1075,7 +1076,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
#' @param numUserBlocks number of user blocks used to parallelize computation (> 0).
#' @param numItemBlocks number of item blocks used to parallelize computation (> 0).
#' @param checkpointInterval number of checkpoint intervals (>= 1) or disable checkpoint (-1).
-#'
+#' @param ... additional argument(s) passed to the method.
#' @return \code{spark.als} returns a fitted ALS model
#' @rdname spark.als
#' @aliases spark.als,SparkDataFrame-method
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index f0605db1e9..4dee3245f9 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -917,19 +917,19 @@ setMethod("sampleByKey",
len <- 0
# mixing because the initial seeds are close to each other
- runif(10)
+ stats::runif(10)
for (elem in part) {
if (elem[[1]] %in% names(fractions)) {
frac <- as.numeric(fractions[which(elem[[1]] == names(fractions))])
if (withReplacement) {
- count <- rpois(1, frac)
+ count <- stats::rpois(1, frac)
if (count > 0) {
res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
len <- len + count
}
} else {
- if (runif(1) < frac) {
+ if (stats::runif(1) < frac) {
len <- len + 1
res[[len]] <- elem
}
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 8ea24d8172..dcd7198f41 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -29,9 +29,9 @@ setOldClass("jobj")
#' @param col1 name of the first column. Distinct items will make the first item of each row.
#' @param col2 name of the second column. Distinct items will make the column names of the output.
#' @return a local R data.frame representing the contingency table. The first column of each row
-#' will be the distinct values of `col1` and the column names will be the distinct values
-#' of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no
-#' occurrences will have zero as their counts.
+#' will be the distinct values of \code{col1} and the column names will be the distinct values
+#' of \code{col2}. The name of the first column will be "\code{col1}_\code{col2}". Pairs
+#' that have no occurrences will have zero as their counts.
#'
#' @rdname crosstab
#' @name crosstab
@@ -116,7 +116,7 @@ setMethod("corr",
#'
#' @param x A SparkDataFrame.
#' @param cols A vector column names to search frequent items in.
-#' @param support (Optional) The minimum frequency for an item to be considered `frequent`.
+#' @param support (Optional) The minimum frequency for an item to be considered \code{frequent}.
#' Should be greater than 1e-4. Default support = 0.01.
#' @return a local R data.frame with the frequent items in each column
#'
@@ -142,9 +142,9 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
#'
#' Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
#' The result of this algorithm has the following deterministic bound:
-#' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to
-#' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the
-#' *exact* rank of `x` is close to (p * N). More precisely,
+#' If the SparkDataFrame has N elements and if we request the quantile at probability p up to
+#' error err, then the algorithm will return a sample x from the SparkDataFrame so that the
+#' *exact* rank of x is close to (p * N). More precisely,
#' floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
#' This method implements a variation of the Greenwald-Khanna algorithm (with some speed
#' optimizations). The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670