From 71afeeea4ec8e67edc95b5d504c557c88a2598b9 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Mon, 22 Aug 2016 15:53:10 -0700 Subject: [SPARK-16508][SPARKR] doc updates and more CRAN check fixes ## What changes were proposed in this pull request? replace ``` ` ``` in code doc with `\code{thing}` remove added `...` for drop(DataFrame) fix remaining CRAN check warnings ## How was this patch tested? create doc with knitr junyangq Author: Felix Cheung Closes #14734 from felixcheung/rdoccleanup. --- R/pkg/NAMESPACE | 6 ++++- R/pkg/R/DataFrame.R | 71 ++++++++++++++++++++++++++-------------------------- R/pkg/R/RDD.R | 10 ++++---- R/pkg/R/SQLContext.R | 30 +++++++++++----------- R/pkg/R/WindowSpec.R | 23 +++++++++-------- R/pkg/R/column.R | 2 +- R/pkg/R/functions.R | 36 +++++++++++++------------- R/pkg/R/generics.R | 15 ++++++----- R/pkg/R/group.R | 1 + R/pkg/R/mllib.R | 19 +++++++------- R/pkg/R/pairRDD.R | 6 ++--- R/pkg/R/stats.R | 14 +++++------ 12 files changed, 119 insertions(+), 114 deletions(-) (limited to 'R') diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index e1b87b28d3..709057675e 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,5 +1,9 @@ # Imports from base R -importFrom(methods, setGeneric, setMethod, setOldClass) +# Do not include stats:: "rpois", "runif" - causes error at runtime +importFrom("methods", "setGeneric", "setMethod", "setOldClass") +importFrom("methods", "is", "new", "signature", "show") +importFrom("stats", "gaussian", "setNames") +importFrom("utils", "download.file", "packageVersion", "untar") # Disable native libraries till we figure out how to package it # See SPARKR-7839 diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 540dc3122d..52a6628ad7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -150,7 +150,7 @@ setMethod("explain", #' isLocal #' -#' Returns True if the `collect` and `take` methods can be run locally +#' Returns True if the \code{collect} and \code{take} methods can be run locally #' (without any Spark executors). #' #' @param x A SparkDataFrame @@ -182,7 +182,7 @@ setMethod("isLocal", #' @param numRows the number of rows to print. Defaults to 20. #' @param truncate whether truncate long strings. If \code{TRUE}, strings more than #' 20 characters will be truncated. However, if set greater than zero, -#' truncates strings longer than `truncate` characters and all cells +#' truncates strings longer than \code{truncate} characters and all cells #' will be aligned right. #' @param ... further arguments to be passed to or from other methods. #' @family SparkDataFrame functions @@ -642,10 +642,10 @@ setMethod("unpersist", #' The following options for repartition are possible: #' \itemize{ #' \item{1.} {Return a new SparkDataFrame partitioned by -#' the given columns into `numPartitions`.} -#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.} +#' the given columns into \code{numPartitions}.} +#' \item{2.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.} #' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s), -#' using `spark.sql.shuffle.partitions` as number of partitions.} +#' using \code{spark.sql.shuffle.partitions} as number of partitions.} #'} #' @param x a SparkDataFrame. #' @param numPartitions the number of partitions to use. @@ -1132,9 +1132,8 @@ setMethod("take", #' Head #' -#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL, -#' then head() returns the first 6 rows in keeping with the current data.frame -#' convention in R. +#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If \code{num} is not +#' specified, then head() returns the first 6 rows as with R data.frame. #' #' @param x a SparkDataFrame. #' @param num the number of rows to return. Default is 6. @@ -1406,11 +1405,11 @@ setMethod("dapplyCollect", #' #' @param cols grouping columns. #' @param func a function to be applied to each group partition specified by grouping -#' column of the SparkDataFrame. The function `func` takes as argument +#' column of the SparkDataFrame. The function \code{func} takes as argument #' a key - grouping columns and a data frame - a local R data.frame. -#' The output of `func` is a local R data.frame. +#' The output of \code{func} is a local R data.frame. #' @param schema the schema of the resulting SparkDataFrame after the function is applied. -#' The schema must match to output of `func`. It has to be defined for each +#' The schema must match to output of \code{func}. It has to be defined for each #' output column with preferred output column name and corresponding data type. #' @return A SparkDataFrame. #' @family SparkDataFrame functions @@ -1497,9 +1496,9 @@ setMethod("gapply", #' #' @param cols grouping columns. #' @param func a function to be applied to each group partition specified by grouping -#' column of the SparkDataFrame. The function `func` takes as argument +#' column of the SparkDataFrame. The function \code{func} takes as argument #' a key - grouping columns and a data frame - a local R data.frame. -#' The output of `func` is a local R data.frame. +#' The output of \code{func} is a local R data.frame. #' @return A data.frame. #' @family SparkDataFrame functions #' @aliases gapplyCollect,SparkDataFrame-method @@ -1657,7 +1656,7 @@ setMethod("$", signature(x = "SparkDataFrame"), getColumn(x, name) }) -#' @param value a Column or NULL. If NULL, the specified Column is dropped. +#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped. #' @rdname select #' @name $<- #' @aliases $<-,SparkDataFrame-method @@ -1747,7 +1746,7 @@ setMethod("[", signature(x = "SparkDataFrame"), #' @family subsetting functions #' @examples #' \dontrun{ -#' # Columns can be selected using `[[` and `[` +#' # Columns can be selected using [[ and [ #' df[[2]] == df[["age"]] #' df[,2] == df[,"age"] #' df[,c("name", "age")] @@ -1792,7 +1791,7 @@ setMethod("subset", signature(x = "SparkDataFrame"), #' select(df, df$name, df$age + 1) #' select(df, c("col1", "col2")) #' select(df, list(df$name, df$age + 1)) -#' # Similar to R data frames columns can also be selected using `$` +#' # Similar to R data frames columns can also be selected using $ #' df[,df$age] #' } #' @note select(SparkDataFrame, character) since 1.4.0 @@ -2443,7 +2442,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' Return a new SparkDataFrame containing the union of rows #' #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame -#' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL. +#' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL. #' Note that this does not remove duplicate rows across the two SparkDataFrames. #' #' @param x A SparkDataFrame @@ -2486,7 +2485,7 @@ setMethod("unionAll", #' Union two or more SparkDataFrames #' -#' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL. +#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL. #' Note that this does not remove duplicate rows across the two SparkDataFrames. #' #' @param x a SparkDataFrame. @@ -2519,7 +2518,7 @@ setMethod("rbind", #' Intersect #' #' Return a new SparkDataFrame containing rows only in both this SparkDataFrame -#' and another SparkDataFrame. This is equivalent to `INTERSECT` in SQL. +#' and another SparkDataFrame. This is equivalent to \code{INTERSECT} in SQL. #' #' @param x A SparkDataFrame #' @param y A SparkDataFrame @@ -2547,7 +2546,7 @@ setMethod("intersect", #' except #' #' Return a new SparkDataFrame containing rows in this SparkDataFrame -#' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL. +#' but not in another SparkDataFrame. This is equivalent to \code{EXCEPT} in SQL. #' #' @param x a SparkDataFrame. #' @param y a SparkDataFrame. @@ -2576,8 +2575,8 @@ setMethod("except", #' Save the contents of SparkDataFrame to a data source. #' -#' The data source is specified by the `source` and a set of options (...). -#' If `source` is not specified, the default data source configured by +#' The data source is specified by the \code{source} and a set of options (...). +#' If \code{source} is not specified, the default data source configured by #' spark.sql.sources.default will be used. #' #' Additionally, mode is used to specify the behavior of the save operation when data already @@ -2613,7 +2612,7 @@ setMethod("except", #' @note write.df since 1.4.0 setMethod("write.df", signature(df = "SparkDataFrame", path = "character"), - function(df, path, source = NULL, mode = "error", ...){ + function(df, path, source = NULL, mode = "error", ...) { if (is.null(source)) { source <- getDefaultSqlSource() } @@ -2635,14 +2634,14 @@ setMethod("write.df", #' @note saveDF since 1.4.0 setMethod("saveDF", signature(df = "SparkDataFrame", path = "character"), - function(df, path, source = NULL, mode = "error", ...){ + function(df, path, source = NULL, mode = "error", ...) { write.df(df, path, source, mode, ...) }) #' Save the contents of the SparkDataFrame to a data source as a table #' -#' The data source is specified by the `source` and a set of options (...). -#' If `source` is not specified, the default data source configured by +#' The data source is specified by the \code{source} and a set of options (...). +#' If \code{source} is not specified, the default data source configured by #' spark.sql.sources.default will be used. #' #' Additionally, mode is used to specify the behavior of the save operation when @@ -2675,7 +2674,7 @@ setMethod("saveDF", #' @note saveAsTable since 1.4.0 setMethod("saveAsTable", signature(df = "SparkDataFrame", tableName = "character"), - function(df, tableName, source = NULL, mode="error", ...){ + function(df, tableName, source = NULL, mode="error", ...) { if (is.null(source)) { source <- getDefaultSqlSource() } @@ -2752,11 +2751,11 @@ setMethod("summary", #' @param how "any" or "all". #' if "any", drop a row if it contains any nulls. #' if "all", drop a row only if all its values are null. -#' if minNonNulls is specified, how is ignored. +#' if \code{minNonNulls} is specified, how is ignored. #' @param minNonNulls if specified, drop rows that have less than -#' minNonNulls non-null values. +#' \code{minNonNulls} non-null values. #' This overwrites the how parameter. -#' @param cols optional list of column names to consider. In `fillna`, +#' @param cols optional list of column names to consider. In \code{fillna}, #' columns specified in cols that do not have matching data #' type are ignored. For example, if value is a character, and #' subset contains a non-character column, then the non-character @@ -2879,8 +2878,8 @@ setMethod("fillna", #' in your system to accommodate the contents. #' #' @param x a SparkDataFrame. -#' @param row.names NULL or a character vector giving the row names for the data frame. -#' @param optional If `TRUE`, converting column names is optional. +#' @param row.names \code{NULL} or a character vector giving the row names for the data frame. +#' @param optional If \code{TRUE}, converting column names is optional. #' @param ... additional arguments to pass to base::as.data.frame. #' @return A data.frame. #' @family SparkDataFrame functions @@ -3058,7 +3057,7 @@ setMethod("str", #' @note drop since 2.0.0 setMethod("drop", signature(x = "SparkDataFrame"), - function(x, col, ...) { + function(x, col) { stopifnot(class(col) == "character" || class(col) == "Column") if (class(col) == "Column") { @@ -3218,8 +3217,8 @@ setMethod("histogram", #' and to not change the existing data. #' } #' -#' @param x s SparkDataFrame. -#' @param url JDBC database url of the form `jdbc:subprotocol:subname`. +#' @param x a SparkDataFrame. +#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}. #' @param tableName yhe name of the table in the external database. #' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default). #' @param ... additional JDBC database connection properties. @@ -3237,7 +3236,7 @@ setMethod("histogram", #' @note write.jdbc since 2.0.0 setMethod("write.jdbc", signature(x = "SparkDataFrame", url = "character", tableName = "character"), - function(x, url, tableName, mode = "error", ...){ + function(x, url, tableName, mode = "error", ...) { jmode <- convertToJSaveMode(mode) jprops <- varargsToJProperties(...) write <- callJMethod(x@sdf, "write") diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 6b254bb0d3..6cd0704003 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -887,17 +887,17 @@ setMethod("sampleRDD", # Discards some random values to ensure each partition has a # different random seed. - runif(partIndex) + stats::runif(partIndex) for (elem in part) { if (withReplacement) { - count <- rpois(1, fraction) + count <- stats::rpois(1, fraction) if (count > 0) { res[ (len + 1) : (len + count) ] <- rep(list(elem), count) len <- len + count } } else { - if (runif(1) < fraction) { + if (stats::runif(1) < fraction) { len <- len + 1 res[[len]] <- elem } @@ -965,7 +965,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", set.seed(seed) samples <- collectRDD(sampleRDD(x, withReplacement, fraction, - as.integer(ceiling(runif(1, + as.integer(ceiling(stats::runif(1, -MAXINT, MAXINT))))) # If the first sample didn't turn out large enough, keep trying to @@ -973,7 +973,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", # multiplier for thei initial size while (length(samples) < total) samples <- collectRDD(sampleRDD(x, withReplacement, fraction, - as.integer(ceiling(runif(1, + as.integer(ceiling(stats::runif(1, -MAXINT, MAXINT))))) diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index a9cd2d85f8..572e71e25b 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -115,7 +115,7 @@ infer_type <- function(x) { #' Get Runtime Config from the current active SparkSession #' #' Get Runtime Config from the current active SparkSession. -#' To change SparkSession Runtime Config, please see `sparkR.session()`. +#' To change SparkSession Runtime Config, please see \code{sparkR.session()}. #' #' @param key (optional) The key of the config to get, if omitted, all config is returned #' @param defaultValue (optional) The default value of the config to return if they config is not @@ -720,11 +720,11 @@ dropTempView <- function(viewName) { #' #' Returns the dataset in a data source as a SparkDataFrame #' -#' The data source is specified by the `source` and a set of options(...). -#' If `source` is not specified, the default data source configured by +#' The data source is specified by the \code{source} and a set of options(...). +#' If \code{source} is not specified, the default data source configured by #' "spark.sql.sources.default" will be used. \cr -#' Similar to R read.csv, when `source` is "csv", by default, a value of "NA" will be interpreted -#' as NA. +#' Similar to R read.csv, when \code{source} is "csv", by default, a value of "NA" will be +#' interpreted as NA. #' #' @param path The path of files to load #' @param source The name of external data source @@ -791,8 +791,8 @@ loadDF <- function(x, ...) { #' Creates an external table based on the dataset in a data source, #' Returns a SparkDataFrame associated with the external table. #' -#' The data source is specified by the `source` and a set of options(...). -#' If `source` is not specified, the default data source configured by +#' The data source is specified by the \code{source} and a set of options(...). +#' If \code{source} is not specified, the default data source configured by #' "spark.sql.sources.default" will be used. #' #' @param tableName a name of the table. @@ -830,22 +830,22 @@ createExternalTable <- function(x, ...) { #' Additional JDBC database connection properties can be set (...) #' #' Only one of partitionColumn or predicates should be set. Partitions of the table will be -#' retrieved in parallel based on the `numPartitions` or by the predicates. +#' retrieved in parallel based on the \code{numPartitions} or by the predicates. #' #' Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash #' your external database systems. #' -#' @param url JDBC database url of the form `jdbc:subprotocol:subname` +#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname} #' @param tableName the name of the table in the external database #' @param partitionColumn the name of a column of integral type that will be used for partitioning -#' @param lowerBound the minimum value of `partitionColumn` used to decide partition stride -#' @param upperBound the maximum value of `partitionColumn` used to decide partition stride -#' @param numPartitions the number of partitions, This, along with `lowerBound` (inclusive), -#' `upperBound` (exclusive), form partition strides for generated WHERE -#' clause expressions used to split the column `partitionColumn` evenly. +#' @param lowerBound the minimum value of \code{partitionColumn} used to decide partition stride +#' @param upperBound the maximum value of \code{partitionColumn} used to decide partition stride +#' @param numPartitions the number of partitions, This, along with \code{lowerBound} (inclusive), +#' \code{upperBound} (exclusive), form partition strides for generated WHERE +#' clause expressions used to split the column \code{partitionColumn} evenly. #' This defaults to SparkContext.defaultParallelism when unset. #' @param predicates a list of conditions in the where clause; each one defines one partition -#' @param ... additional JDBC database connection named propertie(s). +#' @param ... additional JDBC database connection named properties. #' @return SparkDataFrame #' @rdname read.jdbc #' @name read.jdbc diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R index b55356b07d..ddd2ef2fcd 100644 --- a/R/pkg/R/WindowSpec.R +++ b/R/pkg/R/WindowSpec.R @@ -44,6 +44,7 @@ windowSpec <- function(sws) { } #' @rdname show +#' @export #' @note show(WindowSpec) since 2.0.0 setMethod("show", "WindowSpec", function(object) { @@ -125,11 +126,11 @@ setMethod("orderBy", #' rowsBetween #' -#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). +#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} (inclusive). #' -#' Both `start` and `end` are relative positions from the current row. For example, "0" means -#' "current row", while "-1" means the row before the current row, and "5" means the fifth row -#' after the current row. +#' Both \code{start} and \code{end} are relative positions from the current row. For example, +#' "0" means "current row", while "-1" means the row before the current row, and "5" means the +#' fifth row after the current row. #' #' @param x a WindowSpec #' @param start boundary start, inclusive. @@ -157,12 +158,12 @@ setMethod("rowsBetween", #' rangeBetween #' -#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). +#' Defines the frame boundaries, from \code{start} (inclusive) to \code{end} (inclusive). +#' +#' Both \code{start} and \code{end} are relative from the current row. For example, "0" means +#' "current row", while "-1" means one off before the current row, and "5" means the five off +#' after the current row. #' -#' Both `start` and `end` are relative from the current row. For example, "0" means "current row", -#' while "-1" means one off before the current row, and "5" means the five off after the -#' current row. - #' @param x a WindowSpec #' @param start boundary start, inclusive. #' The frame is unbounded if this is the minimum long value. @@ -195,8 +196,8 @@ setMethod("rangeBetween", #' Define a windowing column. #' #' @param x a Column, usually one returned by window function(s). -#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or -#' `windowOrderBy` and configured by other WindowSpec methods. +#' @param window a WindowSpec object. Can be created by \code{windowPartitionBy} or +#' \code{windowOrderBy} and configured by other WindowSpec methods. #' @rdname over #' @name over #' @aliases over,Column,WindowSpec-method diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index af486e1ce2..539d91b0f8 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -284,7 +284,7 @@ setMethod("%in%", #' otherwise #' #' If values in the specified column are null, returns the value. -#' Can be used in conjunction with `when` to specify a default value for expressions. +#' Can be used in conjunction with \code{when} to specify a default value for expressions. #' #' @param x a Column. #' @param value value to replace when the corresponding entry in \code{x} is NA. diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index b3c10de71f..f042adddef 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1250,7 +1250,7 @@ setMethod("rint", #' round #' -#' Returns the value of the column `e` rounded to 0 decimal places using HALF_UP rounding mode. +#' Returns the value of the column \code{e} rounded to 0 decimal places using HALF_UP rounding mode. #' #' @param x Column to compute on. #' @@ -1974,7 +1974,7 @@ setMethod("atan2", signature(y = "Column"), #' datediff #' -#' Returns the number of days from `start` to `end`. +#' Returns the number of days from \code{start} to \code{end}. #' #' @param x start Column to use. #' @param y end Column to use. @@ -2043,7 +2043,7 @@ setMethod("levenshtein", signature(y = "Column"), #' months_between #' -#' Returns number of months between dates `date1` and `date2`. +#' Returns number of months between dates \code{date1} and \code{date2}. #' #' @param x start Column to use. #' @param y end Column to use. @@ -2430,7 +2430,7 @@ setMethod("add_months", signature(y = "Column", x = "numeric"), #' date_add #' -#' Returns the date that is `days` days after `start` +#' Returns the date that is \code{x} days after #' #' @param y Column to compute on #' @param x Number of days to add @@ -2450,7 +2450,7 @@ setMethod("date_add", signature(y = "Column", x = "numeric"), #' date_sub #' -#' Returns the date that is `days` days before `start` +#' Returns the date that is \code{x} days before #' #' @param y Column to compute on #' @param x Number of days to substract @@ -3113,7 +3113,7 @@ setMethod("ifelse", #' N = total number of rows in the partition #' cume_dist(x) = number of values before (and including) x / N #' -#' This is equivalent to the CUME_DIST function in SQL. +#' This is equivalent to the \code{CUME_DIST} function in SQL. #' #' @rdname cume_dist #' @name cume_dist @@ -3141,7 +3141,7 @@ setMethod("cume_dist", #' and had three people tie for second place, you would say that all three were in second #' place and that the next person came in third. #' -#' This is equivalent to the DENSE_RANK function in SQL. +#' This is equivalent to the \code{DENSE_RANK} function in SQL. #' #' @rdname dense_rank #' @name dense_rank @@ -3159,11 +3159,11 @@ setMethod("dense_rank", #' lag #' -#' Window function: returns the value that is `offset` rows before the current row, and -#' `defaultValue` if there is less than `offset` rows before the current row. For example, -#' an `offset` of one will return the previous row at any given point in the window partition. +#' Window function: returns the value that is \code{offset} rows before the current row, and +#' \code{defaultValue} if there is less than \code{offset} rows before the current row. For example, +#' an \code{offset} of one will return the previous row at any given point in the window partition. #' -#' This is equivalent to the LAG function in SQL. +#' This is equivalent to the \code{LAG} function in SQL. #' #' @param x the column as a character string or a Column to compute on. #' @param offset the number of rows back from the current row from which to obtain a value. @@ -3193,11 +3193,11 @@ setMethod("lag", #' lead #' -#' Window function: returns the value that is `offset` rows after the current row, and -#' `null` if there is less than `offset` rows after the current row. For example, -#' an `offset` of one will return the next row at any given point in the window partition. +#' Window function: returns the value that is \code{offset} rows after the current row, and +#' NULL if there is less than \code{offset} rows after the current row. For example, +#' an \code{offset} of one will return the next row at any given point in the window partition. #' -#' This is equivalent to the LEAD function in SQL. +#' This is equivalent to the \code{LEAD} function in SQL. #' #' @param x Column to compute on #' @param offset Number of rows to offset @@ -3226,11 +3226,11 @@ setMethod("lead", #' ntile #' -#' Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window -#' partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second +#' Window function: returns the ntile group id (from 1 to n inclusive) in an ordered window +#' partition. For example, if n is 4, the first quarter of the rows will get value 1, the second #' quarter will get 2, the third quarter will get 3, and the last quarter will get 4. #' -#' This is equivalent to the NTILE function in SQL. +#' This is equivalent to the \code{NTILE} function in SQL. #' #' @param x Number of ntile groups #' diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6610a25c8c..88884e6257 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -438,17 +438,17 @@ setGeneric("columns", function(x) {standardGeneric("columns") }) setGeneric("count", function(x) { standardGeneric("count") }) #' @rdname cov -#' @param x a Column object or a SparkDataFrame. -#' @param ... additional argument(s). If `x` is a Column object, a Column object -#' should be provided. If `x` is a SparkDataFrame, two column names should +#' @param x a Column or a SparkDataFrame. +#' @param ... additional argument(s). If \code{x} is a Column, a Column +#' should be provided. If \code{x} is a SparkDataFrame, two column names should #' be provided. #' @export setGeneric("cov", function(x, ...) {standardGeneric("cov") }) #' @rdname corr -#' @param x a Column object or a SparkDataFrame. -#' @param ... additional argument(s). If `x` is a Column object, a Column object -#' should be provided. If `x` is a SparkDataFrame, two column names should +#' @param x a Column or a SparkDataFrame. +#' @param ... additional argument(s). If \code{x} is a Column, a Column +#' should be provided. If \code{x} is a SparkDataFrame, two column names should #' be provided. #' @export setGeneric("corr", function(x, ...) {standardGeneric("corr") }) @@ -851,7 +851,7 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain setGeneric("ascii", function(x) { standardGeneric("ascii") }) #' @param x Column to compute on or a GroupedData object. -#' @param ... additional argument(s) when `x` is a GroupedData object. +#' @param ... additional argument(s) when \code{x} is a GroupedData object. #' @rdname avg #' @export setGeneric("avg", function(x, ...) { standardGeneric("avg") }) @@ -1339,7 +1339,6 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s setGeneric("spark.survreg", function(data, formula) { standardGeneric("spark.survreg") }) #' @rdname spark.lda -#' @param ... Additional parameters to tune LDA. #' @export setGeneric("spark.lda", function(data, ...) { standardGeneric("spark.lda") }) diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 3c85ada91a..e3479ef5fa 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -48,6 +48,7 @@ groupedData <- function(sgd) { #' @rdname show #' @aliases show,GroupedData-method +#' @export #' @note show(GroupedData) since 1.4.0 setMethod("show", "GroupedData", function(object) { diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index b36fbcee17..a40310d194 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -131,7 +131,7 @@ predict_internal <- function(object, newData) { #' This can be a character string naming a family function, a family function or #' the result of a call to a family function. Refer R family at #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. -#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance +#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance #' weights as 1.0. #' @param tol positive convergence tolerance of iterations. #' @param maxIter integer giving the maximal number of IRLS iterations. @@ -197,7 +197,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), #' This can be a character string naming a family function, a family function or #' the result of a call to a family function. Refer R family at #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. -#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance +#' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance #' weights as 1.0. #' @param epsilon positive convergence tolerance of iterations. #' @param maxit integer giving the maximal number of IRLS iterations. @@ -434,8 +434,8 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"), #' operators are supported, including '~', '.', ':', '+', and '-'. #' @param isotonic Whether the output sequence should be isotonic/increasing (TRUE) or #' antitonic/decreasing (FALSE) -#' @param featureIndex The index of the feature if \code{featuresCol} is a vector column (default: `0`), -#' no effect otherwise +#' @param featureIndex The index of the feature if \code{featuresCol} is a vector column +#' (default: 0), no effect otherwise #' @param weightCol The weight column name. #' @return \code{spark.isoreg} returns a fitted Isotonic Regression model #' @rdname spark.isoreg @@ -647,7 +647,7 @@ setMethod("predict", signature(object = "KMeansModel"), #' @rdname spark.naiveBayes #' @aliases spark.naiveBayes,SparkDataFrame,formula-method #' @name spark.naiveBayes -#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/} +#' @seealso e1071: \url{https://cran.r-project.org/package=e1071} #' @export #' @examples #' \dontrun{ @@ -815,7 +815,7 @@ read.ml <- function(path) { #' Note that operator '.' is not supported currently. #' @return \code{spark.survreg} returns a fitted AFT survival regression model. #' @rdname spark.survreg -#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/} +#' @seealso survival: \url{https://cran.r-project.org/package=survival} #' @export #' @examples #' \dontrun{ @@ -870,10 +870,11 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula #' @param customizedStopWords stopwords that need to be removed from the given corpus. Ignore the #' parameter if libSVM-format column is used as the features column. #' @param maxVocabSize maximum vocabulary size, default 1 << 18 +#' @param ... additional argument(s) passed to the method. #' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model #' @rdname spark.lda #' @aliases spark.lda,SparkDataFrame-method -#' @seealso topicmodels: \url{https://cran.r-project.org/web/packages/topicmodels/} +#' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels} #' @export #' @examples #' \dontrun{ @@ -962,7 +963,7 @@ setMethod("predict", signature(object = "AFTSurvivalRegressionModel"), #' @return \code{spark.gaussianMixture} returns a fitted multivariate gaussian mixture model. #' @rdname spark.gaussianMixture #' @name spark.gaussianMixture -#' @seealso mixtools: \url{https://cran.r-project.org/web/packages/mixtools/} +#' @seealso mixtools: \url{https://cran.r-project.org/package=mixtools} #' @export #' @examples #' \dontrun{ @@ -1075,7 +1076,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"), #' @param numUserBlocks number of user blocks used to parallelize computation (> 0). #' @param numItemBlocks number of item blocks used to parallelize computation (> 0). #' @param checkpointInterval number of checkpoint intervals (>= 1) or disable checkpoint (-1). -#' +#' @param ... additional argument(s) passed to the method. #' @return \code{spark.als} returns a fitted ALS model #' @rdname spark.als #' @aliases spark.als,SparkDataFrame-method diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index f0605db1e9..4dee3245f9 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -917,19 +917,19 @@ setMethod("sampleByKey", len <- 0 # mixing because the initial seeds are close to each other - runif(10) + stats::runif(10) for (elem in part) { if (elem[[1]] %in% names(fractions)) { frac <- as.numeric(fractions[which(elem[[1]] == names(fractions))]) if (withReplacement) { - count <- rpois(1, frac) + count <- stats::rpois(1, frac) if (count > 0) { res[ (len + 1) : (len + count) ] <- rep(list(elem), count) len <- len + count } } else { - if (runif(1) < frac) { + if (stats::runif(1) < frac) { len <- len + 1 res[[len]] <- elem } diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index 8ea24d8172..dcd7198f41 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -29,9 +29,9 @@ setOldClass("jobj") #' @param col1 name of the first column. Distinct items will make the first item of each row. #' @param col2 name of the second column. Distinct items will make the column names of the output. #' @return a local R data.frame representing the contingency table. The first column of each row -#' will be the distinct values of `col1` and the column names will be the distinct values -#' of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no -#' occurrences will have zero as their counts. +#' will be the distinct values of \code{col1} and the column names will be the distinct values +#' of \code{col2}. The name of the first column will be "\code{col1}_\code{col2}". Pairs +#' that have no occurrences will have zero as their counts. #' #' @rdname crosstab #' @name crosstab @@ -116,7 +116,7 @@ setMethod("corr", #' #' @param x A SparkDataFrame. #' @param cols A vector column names to search frequent items in. -#' @param support (Optional) The minimum frequency for an item to be considered `frequent`. +#' @param support (Optional) The minimum frequency for an item to be considered \code{frequent}. #' Should be greater than 1e-4. Default support = 0.01. #' @return a local R data.frame with the frequent items in each column #' @@ -142,9 +142,9 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"), #' #' Calculates the approximate quantiles of a numerical column of a SparkDataFrame. #' The result of this algorithm has the following deterministic bound: -#' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to -#' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the -#' *exact* rank of `x` is close to (p * N). More precisely, +#' If the SparkDataFrame has N elements and if we request the quantile at probability p up to +#' error err, then the algorithm will return a sample x from the SparkDataFrame so that the +#' *exact* rank of x is close to (p * N). More precisely, #' floor((p - err) * N) <= rank(x) <= ceil((p + err) * N). #' This method implements a variation of the Greenwald-Khanna algorithm (with some speed #' optimizations). The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 -- cgit v1.2.3