diff options
-rw-r--r-- | R/pkg/R/generics.R | 8 | ||||
-rw-r--r-- | R/pkg/R/stats.R | 32 |
2 files changed, 17 insertions, 23 deletions
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ead403be98..43395aaa1d 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -434,19 +434,19 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) -#' @rdname statfunctions +#' @rdname cov #' @export setGeneric("cov", function(x, ...) {standardGeneric("cov") }) -#' @rdname statfunctions +#' @rdname corr #' @export setGeneric("corr", function(x, ...) {standardGeneric("corr") }) -#' @rdname statfunctions +#' @rdname cov #' @export setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") }) -#' @rdname statfunctions +#' @rdname covar_pop #' @export setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") }) diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index e92b9e3d84..e40b1773d7 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -19,9 +19,10 @@ setOldClass("jobj") -#' crosstab -#' -#' Computes a pair-wise frequency table of the given columns. Also known as a contingency +#' @title SparkDataFrame statistic functions + +#' @description +#' crosstab - Computes a pair-wise frequency table of the given columns. Also known as a contingency #' table. The number of distinct values for each column should be less than 1e4. At most 1e6 #' non-zero pair frequencies will be returned. #' @@ -49,8 +50,6 @@ setMethod("crosstab", collect(dataFrame(sct)) }) -#' cov -#' #' Calculate the sample covariance of two numerical columns of a SparkDataFrame. #' #' @param x A SparkDataFrame @@ -58,7 +57,7 @@ setMethod("crosstab", #' @param col2 the name of the second column #' @return the covariance of the two columns. #' -#' @rdname statfunctions +#' @rdname cov #' @name cov #' @export #' @examples @@ -75,8 +74,6 @@ setMethod("cov", callJMethod(statFunctions, "cov", col1, col2) }) -#' corr -#' #' Calculates the correlation of two columns of a SparkDataFrame. #' Currently only supports the Pearson Correlation Coefficient. #' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics. @@ -88,7 +85,7 @@ setMethod("cov", #' only "pearson" is allowed now. #' @return The Pearson Correlation Coefficient as a Double. #' -#' @rdname statfunctions +#' @rdname corr #' @name corr #' @export #' @examples @@ -106,9 +103,8 @@ setMethod("corr", callJMethod(statFunctions, "corr", col1, col2, method) }) -#' freqItems -#' -#' Finding frequent items for columns, possibly with false positives. +#' @description +#' freqItems - Finding frequent items for columns, possibly with false positives. #' Using the frequent element count algorithm described in #' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou. #' @@ -134,10 +130,8 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"), collect(dataFrame(sct)) }) -#' approxQuantile -#' -#' Calculates the approximate quantiles of a numerical column of a SparkDataFrame. -#' +#' @description +#' approxQuantile - Calculates the approximate quantiles of a numerical column of a SparkDataFrame. #' The result of this algorithm has the following deterministic bound: #' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to #' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the @@ -174,9 +168,9 @@ setMethod("approxQuantile", as.list(probabilities), relativeError) }) -#' sampleBy -#' -#' Returns a stratified sample without replacement based on the fraction given on each stratum. +#' @description +#' sampleBy - Returns a stratified sample without replacement based on the fraction given on each +#' stratum. #' #' @param x A SparkDataFrame #' @param col column that defines strata |