[SPARK-15319][SPARKR][DOCS] Fix SparkR doc layout for corr and other DataFrame stats functions

## What changes were proposed in this pull request? Doc only changes. Please see screenshots. Before: http://spark.apache.org/docs/latest/api/R/statfunctions.html ![image](https://cloud.githubusercontent.com/assets/8969467/15264110/cd458826-1924-11e6-85bd-8ee2e2e1a85f.png) After ![image](https://cloud.githubusercontent.com/assets/8969467/16218452/b9e89f08-3732-11e6-969d-a3a1796e7ad0.png) (please ignore the style differences - this is due to not having the css in my local copy) This is still a bit weird. As discussed in SPARK-15237, I think the better approach is to separate out the DataFrame stats function instead of putting everything on one page. At least now it is clearer which description is on which function. ## How was this patch tested? Build doc Author: Felix Cheung <felixcheung_m@hotmail.com> Author: felixcheung <felixcheung_m@hotmail.com> Closes #13109 from felixcheung/rstatdoc.
author: Felix Cheung <felixcheung_m@hotmail.com> 2016-06-21 00:19:09 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2016-06-21 00:19:09 -0700
commit: 843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a (patch)
tree: 97e0bee7d0e3ec4934bef50ba67ff8e0a8a1a7d5 /R
parent: 09f4ceaeb0a99874f774e09d868fdf907ecf256f (diff)
download: spark-843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a.tar.gz
spark-843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a.tar.bz2
spark-843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a.zip
2 files changed, 17 insertions, 23 deletions
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ead403be98..43395aaa1d 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -434,19 +434,19 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
 
-#' @rdname statfunctions
+#' @rdname cov
 #' @export
 setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
-#' @rdname statfunctions
+#' @rdname corr
 #' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 
-#' @rdname statfunctions
+#' @rdname cov
 #' @export
 setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
 
-#' @rdname statfunctions
+#' @rdname covar_pop
 #' @export
 setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
 
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index e92b9e3d84..e40b1773d7 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -19,9 +19,10 @@
 
 setOldClass("jobj")
 
-#' crosstab
-#'
-#' Computes a pair-wise frequency table of the given columns. Also known as a contingency
+#' @title SparkDataFrame statistic functions
+
+#' @description
+#' crosstab - Computes a pair-wise frequency table of the given columns. Also known as a contingency
 #' table. The number of distinct values for each column should be less than 1e4. At most 1e6
 #' non-zero pair frequencies will be returned.
 #'
@@ -49,8 +50,6 @@ setMethod("crosstab",
             collect(dataFrame(sct))
           })
 
-#' cov
-#'
 #' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
 #'
 #' @param x A SparkDataFrame
@@ -58,7 +57,7 @@ setMethod("crosstab",
 #' @param col2 the name of the second column
 #' @return the covariance of the two columns.
 #'
-#' @rdname statfunctions
+#' @rdname cov
 #' @name cov
 #' @export
 #' @examples
@@ -75,8 +74,6 @@ setMethod("cov",
             callJMethod(statFunctions, "cov", col1, col2)
           })
 
-#' corr
-#'
 #' Calculates the correlation of two columns of a SparkDataFrame.
 #' Currently only supports the Pearson Correlation Coefficient.
 #' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics.
@@ -88,7 +85,7 @@ setMethod("cov",
 #'               only "pearson" is allowed now.
 #' @return The Pearson Correlation Coefficient as a Double.
 #'
-#' @rdname statfunctions
+#' @rdname corr
 #' @name corr
 #' @export
 #' @examples
@@ -106,9 +103,8 @@ setMethod("corr",
             callJMethod(statFunctions, "corr", col1, col2, method)
           })
 
-#' freqItems
-#'
-#' Finding frequent items for columns, possibly with false positives.
+#' @description
+#' freqItems - Finding frequent items for columns, possibly with false positives.
 #' Using the frequent element count algorithm described in
 #' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
 #'
@@ -134,10 +130,8 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
             collect(dataFrame(sct))
           })
 
-#' approxQuantile
-#'
-#' Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
-#'
+#' @description
+#' approxQuantile - Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
 #' The result of this algorithm has the following deterministic bound:
 #' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to
 #' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the
@@ -174,9 +168,9 @@ setMethod("approxQuantile",
                         as.list(probabilities), relativeError)
           })
 
-#' sampleBy
-#'
-#' Returns a stratified sample without replacement based on the fraction given on each stratum.
+#' @description
+#' sampleBy - Returns a stratified sample without replacement based on the fraction given on each
+#' stratum.
 #'
 #' @param x A SparkDataFrame
 #' @param col column that defines strata
author	Felix Cheung <felixcheung_m@hotmail.com>	2016-06-21 00:19:09 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2016-06-21 00:19:09 -0700
commit	843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a (patch)
tree	97e0bee7d0e3ec4934bef50ba67ff8e0a8a1a7d5 /R
parent	09f4ceaeb0a99874f774e09d868fdf907ecf256f (diff)
download	spark-843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a.tar.gz spark-843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a.tar.bz2 spark-843a1eba8ec9d5a7beac0c74b54d24cb3c41b45a.zip