aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorNarine Kokhlikyan <narine.kokhlikyan@gmail.com>2015-10-08 09:53:44 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-10-08 09:53:44 -0700
commite8f90d9dda3f87fef01c683462eac67aad750f60 (patch)
tree2c2d8d5739ef8ea0fced3639c0dfc0c9e938faf0 /R/pkg
parent56a9692fc06077e31b37c00957e8011235f4e4eb (diff)
downloadspark-e8f90d9dda3f87fef01c683462eac67aad750f60.tar.gz
spark-e8f90d9dda3f87fef01c683462eac67aad750f60.tar.bz2
spark-e8f90d9dda3f87fef01c683462eac67aad750f60.zip
[SPARK-10836] [SPARKR] Added sort(x, decreasing, col, ... ) method to DataFrame
the sort function can be used as an alternative to arrange(... ). As arguments it accepts x - dataframe, decreasing - TRUE/FALSE, a list of orderings for columns and the list of columns, represented as string names for example: sort(df, TRUE, "col1","col2","col3","col5") # for example, if we want to sort some of the columns in the same order sort(df, decreasing=TRUE, "col1") sort(df, decreasing=c(TRUE,FALSE), "col1","col2") Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com> Closes #8920 from NarineK/sparkrsort.
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/R/DataFrame.R47
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R11
2 files changed, 49 insertions, 9 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 85db3a5ed3..1b9137e6c7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1298,8 +1298,10 @@ setClassUnion("characterOrColumn", c("character", "Column"))
#' Sort a DataFrame by the specified column(s).
#'
#' @param x A DataFrame to be sorted.
-#' @param col Either a Column object or character vector indicating the field to sort on
+#' @param col A character or Column object vector indicating the fields to sort on
#' @param ... Additional sorting fields
+#' @param decreasing A logical argument indicating sorting order for columns when
+#' a character vector is specified for col
#' @return A DataFrame where all elements are sorted.
#' @rdname arrange
#' @name arrange
@@ -1312,24 +1314,53 @@ setClassUnion("characterOrColumn", c("character", "Column"))
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
#' arrange(df, df$col1)
-#' arrange(df, "col1")
#' arrange(df, asc(df$col1), desc(abs(df$col2)))
+#' arrange(df, "col1", decreasing = TRUE)
+#' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
#' }
setMethod("arrange",
- signature(x = "DataFrame", col = "characterOrColumn"),
+ signature(x = "DataFrame", col = "Column"),
function(x, col, ...) {
- if (class(col) == "character") {
- sdf <- callJMethod(x@sdf, "sort", col, list(...))
- } else if (class(col) == "Column") {
jcols <- lapply(list(col, ...), function(c) {
c@jc
})
- sdf <- callJMethod(x@sdf, "sort", jcols)
- }
+
+ sdf <- callJMethod(x@sdf, "sort", jcols)
dataFrame(sdf)
})
#' @rdname arrange
+#' @export
+setMethod("arrange",
+ signature(x = "DataFrame", col = "character"),
+ function(x, col, ..., decreasing = FALSE) {
+
+ # all sorting columns
+ by <- list(col, ...)
+
+ if (length(decreasing) == 1) {
+ # in case only 1 boolean argument - decreasing value is specified,
+ # it will be used for all columns
+ decreasing <- rep(decreasing, length(by))
+ } else if (length(decreasing) != length(by)) {
+ stop("Arguments 'col' and 'decreasing' must have the same length")
+ }
+
+ # builds a list of columns of type Column
+ # example: [[1]] Column Species ASC
+ # [[2]] Column Petal_Length DESC
+ jcols <- lapply(seq_len(length(decreasing)), function(i){
+ if (decreasing[[i]]) {
+ desc(getColumn(x, by[[i]]))
+ } else {
+ asc(getColumn(x, by[[i]]))
+ }
+ })
+
+ do.call("arrange", c(x, jcols))
+ })
+
+#' @rdname arrange
#' @name orderby
setMethod("orderBy",
signature(x = "DataFrame", col = "characterOrColumn"),
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index bcf52b8fa7..e85de25070 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -989,7 +989,7 @@ test_that("arrange() and orderBy() on a DataFrame", {
sorted <- arrange(df, df$age)
expect_equal(collect(sorted)[1,2], "Michael")
- sorted2 <- arrange(df, "name")
+ sorted2 <- arrange(df, "name", decreasing = FALSE)
expect_equal(collect(sorted2)[2,"age"], 19)
sorted3 <- orderBy(df, asc(df$age))
@@ -999,6 +999,15 @@ test_that("arrange() and orderBy() on a DataFrame", {
sorted4 <- orderBy(df, desc(df$name))
expect_equal(first(sorted4)$name, "Michael")
expect_equal(collect(sorted4)[3,"name"], "Andy")
+
+ sorted5 <- arrange(df, "age", "name", decreasing = TRUE)
+ expect_equal(collect(sorted5)[1,2], "Andy")
+
+ sorted6 <- arrange(df, "age","name", decreasing = c(T, F))
+ expect_equal(collect(sorted6)[1,2], "Andy")
+
+ sorted7 <- arrange(df, "name", decreasing = FALSE)
+ expect_equal(collect(sorted7)[2,"age"], 19)
})
test_that("filter() on a DataFrame", {