aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-11-27 11:48:01 -0800
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-11-27 11:48:01 -0800
commitba02f6cb5a40511cefa511d410be93c035d43f23 (patch)
treeee02c7cff65e95b885d21d5b13c27a4ecd2623ea /R
parenta374e20b5492c775f20d32e8fbddadbd8098a655 (diff)
downloadspark-ba02f6cb5a40511cefa511d410be93c035d43f23.tar.gz
spark-ba02f6cb5a40511cefa511d410be93c035d43f23.tar.bz2
spark-ba02f6cb5a40511cefa511d410be93c035d43f23.zip
[SPARK-12025][SPARKR] Rename some window rank function names for SparkR
Change ```cumeDist -> cume_dist, denseRank -> dense_rank, percentRank -> percent_rank, rowNumber -> row_number``` at SparkR side. There are two reasons that we should make this change: * We should follow the [naming convention rule of R](http://www.inside-r.org/node/230645) * Spark DataFrame has deprecated the old convention (such as ```cumeDist```) and will remove it in Spark 2.0. It's better to fix this issue before 1.6 release, otherwise we will make breaking API change. cc shivaram sun-rui Author: Yanbo Liang <ybliang8@gmail.com> Closes #10016 from yanboliang/SPARK-12025.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE8
-rw-r--r--R/pkg/R/functions.R54
-rw-r--r--R/pkg/R/generics.R16
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R4
4 files changed, 41 insertions, 41 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 260c9edce6..5d04dd6aca 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -123,14 +123,14 @@ exportMethods("%in%",
"count",
"countDistinct",
"crc32",
- "cumeDist",
+ "cume_dist",
"date_add",
"date_format",
"date_sub",
"datediff",
"dayofmonth",
"dayofyear",
- "denseRank",
+ "dense_rank",
"desc",
"endsWith",
"exp",
@@ -188,7 +188,7 @@ exportMethods("%in%",
"next_day",
"ntile",
"otherwise",
- "percentRank",
+ "percent_rank",
"pmod",
"quarter",
"rand",
@@ -200,7 +200,7 @@ exportMethods("%in%",
"rint",
"rlike",
"round",
- "rowNumber",
+ "row_number",
"rpad",
"rtrim",
"second",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 25a1f22101..e98e7a0117 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2146,47 +2146,47 @@ setMethod("ifelse",
###################### Window functions######################
-#' cumeDist
+#' cume_dist
#'
#' Window function: returns the cumulative distribution of values within a window partition,
#' i.e. the fraction of rows that are below the current row.
#'
#' N = total number of rows in the partition
-#' cumeDist(x) = number of values before (and including) x / N
+#' cume_dist(x) = number of values before (and including) x / N
#'
#' This is equivalent to the CUME_DIST function in SQL.
#'
-#' @rdname cumeDist
-#' @name cumeDist
+#' @rdname cume_dist
+#' @name cume_dist
#' @family window_funcs
#' @export
-#' @examples \dontrun{cumeDist()}
-setMethod("cumeDist",
+#' @examples \dontrun{cume_dist()}
+setMethod("cume_dist",
signature(x = "missing"),
function() {
- jc <- callJStatic("org.apache.spark.sql.functions", "cumeDist")
+ jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist")
column(jc)
})
-#' denseRank
+#' dense_rank
#'
#' Window function: returns the rank of rows within a window partition, without any gaps.
-#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-#' sequence when there are ties. That is, if you were ranking a competition using denseRank
+#' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+#' sequence when there are ties. That is, if you were ranking a competition using dense_rank
#' and had three people tie for second place, you would say that all three were in second
#' place and that the next person came in third.
#'
#' This is equivalent to the DENSE_RANK function in SQL.
#'
-#' @rdname denseRank
-#' @name denseRank
+#' @rdname dense_rank
+#' @name dense_rank
#' @family window_funcs
#' @export
-#' @examples \dontrun{denseRank()}
-setMethod("denseRank",
+#' @examples \dontrun{dense_rank()}
+setMethod("dense_rank",
signature(x = "missing"),
function() {
- jc <- callJStatic("org.apache.spark.sql.functions", "denseRank")
+ jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank")
column(jc)
})
@@ -2264,7 +2264,7 @@ setMethod("ntile",
column(jc)
})
-#' percentRank
+#' percent_rank
#'
#' Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
#'
@@ -2274,15 +2274,15 @@ setMethod("ntile",
#'
#' This is equivalent to the PERCENT_RANK function in SQL.
#'
-#' @rdname percentRank
-#' @name percentRank
+#' @rdname percent_rank
+#' @name percent_rank
#' @family window_funcs
#' @export
-#' @examples \dontrun{percentRank()}
-setMethod("percentRank",
+#' @examples \dontrun{percent_rank()}
+setMethod("percent_rank",
signature(x = "missing"),
function() {
- jc <- callJStatic("org.apache.spark.sql.functions", "percentRank")
+ jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank")
column(jc)
})
@@ -2316,21 +2316,21 @@ setMethod("rank",
base::rank(x, ...)
})
-#' rowNumber
+#' row_number
#'
#' Window function: returns a sequential number starting at 1 within a window partition.
#'
#' This is equivalent to the ROW_NUMBER function in SQL.
#'
-#' @rdname rowNumber
-#' @name rowNumber
+#' @rdname row_number
+#' @name row_number
#' @family window_funcs
#' @export
-#' @examples \dontrun{rowNumber()}
-setMethod("rowNumber",
+#' @examples \dontrun{row_number()}
+setMethod("row_number",
signature(x = "missing"),
function() {
- jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
+ jc <- callJStatic("org.apache.spark.sql.functions", "row_number")
column(jc)
})
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 1b3f10ea04..0c305441e0 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -700,9 +700,9 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
#' @export
setGeneric("crc32", function(x) { standardGeneric("crc32") })
-#' @rdname cumeDist
+#' @rdname cume_dist
#' @export
-setGeneric("cumeDist", function(x) { standardGeneric("cumeDist") })
+setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })
#' @rdname datediff
#' @export
@@ -728,9 +728,9 @@ setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
#' @export
setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
-#' @rdname denseRank
+#' @rdname dense_rank
#' @export
-setGeneric("denseRank", function(x) { standardGeneric("denseRank") })
+setGeneric("dense_rank", function(x) { standardGeneric("dense_rank") })
#' @rdname explode
#' @export
@@ -872,9 +872,9 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
#' @export
setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
-#' @rdname percentRank
+#' @rdname percent_rank
#' @export
-setGeneric("percentRank", function(x) { standardGeneric("percentRank") })
+setGeneric("percent_rank", function(x) { standardGeneric("percent_rank") })
#' @rdname pmod
#' @export
@@ -913,9 +913,9 @@ setGeneric("reverse", function(x) { standardGeneric("reverse") })
#' @export
setGeneric("rint", function(x, ...) { standardGeneric("rint") })
-#' @rdname rowNumber
+#' @rdname row_number
#' @export
-setGeneric("rowNumber", function(x) { standardGeneric("rowNumber") })
+setGeneric("row_number", function(x) { standardGeneric("row_number") })
#' @rdname rpad
#' @export
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 3f4f319fe7..0fbe065826 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -861,8 +861,8 @@ test_that("column functions", {
c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
c12 <- variance(c)
c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
- c14 <- cumeDist() + ntile(1)
- c15 <- denseRank() + percentRank() + rank() + rowNumber()
+ c14 <- cume_dist() + ntile(1)
+ c15 <- dense_rank() + percent_rank() + rank() + row_number()
# Test if base::rank() is exposed
expect_equal(class(rank())[[1]], "Column")