aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/generics.R8
-rw-r--r--R/pkg/R/stats.R32
2 files changed, 17 insertions, 23 deletions
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ead403be98..43395aaa1d 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -434,19 +434,19 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
#' @export
setGeneric("columns", function(x) {standardGeneric("columns") })
-#' @rdname statfunctions
+#' @rdname cov
#' @export
setGeneric("cov", function(x, ...) {standardGeneric("cov") })
-#' @rdname statfunctions
+#' @rdname corr
#' @export
setGeneric("corr", function(x, ...) {standardGeneric("corr") })
-#' @rdname statfunctions
+#' @rdname cov
#' @export
setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
-#' @rdname statfunctions
+#' @rdname covar_pop
#' @export
setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index e92b9e3d84..e40b1773d7 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -19,9 +19,10 @@
setOldClass("jobj")
-#' crosstab
-#'
-#' Computes a pair-wise frequency table of the given columns. Also known as a contingency
+#' @title SparkDataFrame statistic functions
+
+#' @description
+#' crosstab - Computes a pair-wise frequency table of the given columns. Also known as a contingency
#' table. The number of distinct values for each column should be less than 1e4. At most 1e6
#' non-zero pair frequencies will be returned.
#'
@@ -49,8 +50,6 @@ setMethod("crosstab",
collect(dataFrame(sct))
})
-#' cov
-#'
#' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
#'
#' @param x A SparkDataFrame
@@ -58,7 +57,7 @@ setMethod("crosstab",
#' @param col2 the name of the second column
#' @return the covariance of the two columns.
#'
-#' @rdname statfunctions
+#' @rdname cov
#' @name cov
#' @export
#' @examples
@@ -75,8 +74,6 @@ setMethod("cov",
callJMethod(statFunctions, "cov", col1, col2)
})
-#' corr
-#'
#' Calculates the correlation of two columns of a SparkDataFrame.
#' Currently only supports the Pearson Correlation Coefficient.
#' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics.
@@ -88,7 +85,7 @@ setMethod("cov",
#' only "pearson" is allowed now.
#' @return The Pearson Correlation Coefficient as a Double.
#'
-#' @rdname statfunctions
+#' @rdname corr
#' @name corr
#' @export
#' @examples
@@ -106,9 +103,8 @@ setMethod("corr",
callJMethod(statFunctions, "corr", col1, col2, method)
})
-#' freqItems
-#'
-#' Finding frequent items for columns, possibly with false positives.
+#' @description
+#' freqItems - Finding frequent items for columns, possibly with false positives.
#' Using the frequent element count algorithm described in
#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
#'
@@ -134,10 +130,8 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
collect(dataFrame(sct))
})
-#' approxQuantile
-#'
-#' Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
-#'
+#' @description
+#' approxQuantile - Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
#' The result of this algorithm has the following deterministic bound:
#' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to
#' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the
@@ -174,9 +168,9 @@ setMethod("approxQuantile",
as.list(probabilities), relativeError)
})
-#' sampleBy
-#'
-#' Returns a stratified sample without replacement based on the fraction given on each stratum.
+#' @description
+#' sampleBy - Returns a stratified sample without replacement based on the fraction given on each
+#' stratum.
#'
#' @param x A SparkDataFrame
#' @param col column that defines strata