From e8f90d9dda3f87fef01c683462eac67aad750f60 Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Thu, 8 Oct 2015 09:53:44 -0700 Subject: [SPARK-10836] [SPARKR] Added sort(x, decreasing, col, ... ) method to DataFrame the sort function can be used as an alternative to arrange(... ). As arguments it accepts x - dataframe, decreasing - TRUE/FALSE, a list of orderings for columns and the list of columns, represented as string names for example: sort(df, TRUE, "col1","col2","col3","col5") # for example, if we want to sort some of the columns in the same order sort(df, decreasing=TRUE, "col1") sort(df, decreasing=c(TRUE,FALSE), "col1","col2") Author: Narine Kokhlikyan Closes #8920 from NarineK/sparkrsort. --- R/pkg/R/DataFrame.R | 47 +++++++++++++++++++++++++++++++++------- R/pkg/inst/tests/test_sparkSQL.R | 11 +++++++++- 2 files changed, 49 insertions(+), 9 deletions(-) (limited to 'R/pkg') diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 85db3a5ed3..1b9137e6c7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1298,8 +1298,10 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' Sort a DataFrame by the specified column(s). #' #' @param x A DataFrame to be sorted. -#' @param col Either a Column object or character vector indicating the field to sort on +#' @param col A character or Column object vector indicating the fields to sort on #' @param ... Additional sorting fields +#' @param decreasing A logical argument indicating sorting order for columns when +#' a character vector is specified for col #' @return A DataFrame where all elements are sorted. #' @rdname arrange #' @name arrange @@ -1312,23 +1314,52 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' path <- "path/to/file.json" #' df <- jsonFile(sqlContext, path) #' arrange(df, df$col1) -#' arrange(df, "col1") #' arrange(df, asc(df$col1), desc(abs(df$col2))) +#' arrange(df, "col1", decreasing = TRUE) +#' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE)) #' } setMethod("arrange", - signature(x = "DataFrame", col = "characterOrColumn"), + signature(x = "DataFrame", col = "Column"), function(x, col, ...) { - if (class(col) == "character") { - sdf <- callJMethod(x@sdf, "sort", col, list(...)) - } else if (class(col) == "Column") { jcols <- lapply(list(col, ...), function(c) { c@jc }) - sdf <- callJMethod(x@sdf, "sort", jcols) - } + + sdf <- callJMethod(x@sdf, "sort", jcols) dataFrame(sdf) }) +#' @rdname arrange +#' @export +setMethod("arrange", + signature(x = "DataFrame", col = "character"), + function(x, col, ..., decreasing = FALSE) { + + # all sorting columns + by <- list(col, ...) + + if (length(decreasing) == 1) { + # in case only 1 boolean argument - decreasing value is specified, + # it will be used for all columns + decreasing <- rep(decreasing, length(by)) + } else if (length(decreasing) != length(by)) { + stop("Arguments 'col' and 'decreasing' must have the same length") + } + + # builds a list of columns of type Column + # example: [[1]] Column Species ASC + # [[2]] Column Petal_Length DESC + jcols <- lapply(seq_len(length(decreasing)), function(i){ + if (decreasing[[i]]) { + desc(getColumn(x, by[[i]])) + } else { + asc(getColumn(x, by[[i]])) + } + }) + + do.call("arrange", c(x, jcols)) + }) + #' @rdname arrange #' @name orderby setMethod("orderBy", diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index bcf52b8fa7..e85de25070 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -989,7 +989,7 @@ test_that("arrange() and orderBy() on a DataFrame", { sorted <- arrange(df, df$age) expect_equal(collect(sorted)[1,2], "Michael") - sorted2 <- arrange(df, "name") + sorted2 <- arrange(df, "name", decreasing = FALSE) expect_equal(collect(sorted2)[2,"age"], 19) sorted3 <- orderBy(df, asc(df$age)) @@ -999,6 +999,15 @@ test_that("arrange() and orderBy() on a DataFrame", { sorted4 <- orderBy(df, desc(df$name)) expect_equal(first(sorted4)$name, "Michael") expect_equal(collect(sorted4)[3,"name"], "Andy") + + sorted5 <- arrange(df, "age", "name", decreasing = TRUE) + expect_equal(collect(sorted5)[1,2], "Andy") + + sorted6 <- arrange(df, "age","name", decreasing = c(T, F)) + expect_equal(collect(sorted6)[1,2], "Andy") + + sorted7 <- arrange(df, "name", decreasing = FALSE) + expect_equal(collect(sorted7)[2,"age"], 19) }) test_that("filter() on a DataFrame", { -- cgit v1.2.3