[SPARK-10836] [SPARKR] Added sort(x, decreasing, col, ... ) method to DataFrame

the sort function can be used as an alternative to arrange(... ). As arguments it accepts x - dataframe, decreasing - TRUE/FALSE, a list of orderings for columns and the list of columns, represented as string names for example: sort(df, TRUE, "col1","col2","col3","col5") # for example, if we want to sort some of the columns in the same order sort(df, decreasing=TRUE, "col1") sort(df, decreasing=c(TRUE,FALSE), "col1","col2") Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com> Closes #8920 from NarineK/sparkrsort.
author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com> 2015-10-08 09:53:44 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-10-08 09:53:44 -0700
commit: e8f90d9dda3f87fef01c683462eac67aad750f60 (patch)
tree: 2c2d8d5739ef8ea0fced3639c0dfc0c9e938faf0 /R/pkg
parent: 56a9692fc06077e31b37c00957e8011235f4e4eb (diff)
download: spark-e8f90d9dda3f87fef01c683462eac67aad750f60.tar.gz
spark-e8f90d9dda3f87fef01c683462eac67aad750f60.tar.bz2
spark-e8f90d9dda3f87fef01c683462eac67aad750f60.zip
2 files changed, 49 insertions, 9 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 85db3a5ed3..1b9137e6c7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1298,8 +1298,10 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' Sort a DataFrame by the specified column(s).
 #'
 #' @param x A DataFrame to be sorted.
-#' @param col Either a Column object or character vector indicating the field to sort on
+#' @param col A character or Column object vector indicating the fields to sort on
 #' @param ... Additional sorting fields
+#' @param decreasing A logical argument indicating sorting order for columns when
+#'                   a character vector is specified for col
 #' @return A DataFrame where all elements are sorted.
 #' @rdname arrange
 #' @name arrange
@@ -1312,24 +1314,53 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlContext, path)
 #' arrange(df, df$col1)
-#' arrange(df, "col1")
 #' arrange(df, asc(df$col1), desc(abs(df$col2)))
+#' arrange(df, "col1", decreasing = TRUE)
+#' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
 #' }
 setMethod("arrange",
-          signature(x = "DataFrame", col = "characterOrColumn"),
+          signature(x = "DataFrame", col = "Column"),
           function(x, col, ...) {
-            if (class(col) == "character") {
-              sdf <- callJMethod(x@sdf, "sort", col, list(...))
-            } else if (class(col) == "Column") {
               jcols <- lapply(list(col, ...), function(c) {
                 c@jc
               })
-              sdf <- callJMethod(x@sdf, "sort", jcols)
-            }
+
+            sdf <- callJMethod(x@sdf, "sort", jcols)
             dataFrame(sdf)
           })
 
 #' @rdname arrange
+#' @export
+setMethod("arrange",
+          signature(x = "DataFrame", col = "character"),
+          function(x, col, ..., decreasing = FALSE) {
+
+            # all sorting columns
+            by <- list(col, ...)
+
+            if (length(decreasing) == 1) {
+              # in case only 1 boolean argument - decreasing value is specified,
+              # it will be used for all columns
+              decreasing <- rep(decreasing, length(by))
+            } else if (length(decreasing) != length(by)) {
+              stop("Arguments 'col' and 'decreasing' must have the same length")
+            }
+
+            # builds a list of columns of type Column
+            # example: [[1]] Column Species ASC
+            #          [[2]] Column Petal_Length DESC
+            jcols <- lapply(seq_len(length(decreasing)), function(i){
+              if (decreasing[[i]]) {
+                desc(getColumn(x, by[[i]]))
+              } else {
+                asc(getColumn(x, by[[i]]))
+              }
+            })
+
+            do.call("arrange", c(x, jcols))
+          })
+
+#' @rdname arrange
 #' @name orderby
 setMethod("orderBy",
           signature(x = "DataFrame", col = "characterOrColumn"),
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index bcf52b8fa7..e85de25070 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -989,7 +989,7 @@ test_that("arrange() and orderBy() on a DataFrame", {
   sorted <- arrange(df, df$age)
   expect_equal(collect(sorted)[1,2], "Michael")
 
-  sorted2 <- arrange(df, "name")
+  sorted2 <- arrange(df, "name", decreasing = FALSE)
   expect_equal(collect(sorted2)[2,"age"], 19)
 
   sorted3 <- orderBy(df, asc(df$age))
@@ -999,6 +999,15 @@ test_that("arrange() and orderBy() on a DataFrame", {
   sorted4 <- orderBy(df, desc(df$name))
   expect_equal(first(sorted4)$name, "Michael")
   expect_equal(collect(sorted4)[3,"name"], "Andy")
+
+  sorted5 <- arrange(df, "age", "name", decreasing = TRUE)
+  expect_equal(collect(sorted5)[1,2], "Andy")
+
+  sorted6 <- arrange(df, "age","name", decreasing = c(T, F))
+  expect_equal(collect(sorted6)[1,2], "Andy")
+
+  sorted7 <- arrange(df, "name", decreasing = FALSE)
+  expect_equal(collect(sorted7)[2,"age"], 19)
 })
 
 test_that("filter() on a DataFrame", {
author	Narine Kokhlikyan <narine.kokhlikyan@gmail.com>	2015-10-08 09:53:44 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-10-08 09:53:44 -0700
commit	e8f90d9dda3f87fef01c683462eac67aad750f60 (patch)
tree	2c2d8d5739ef8ea0fced3639c0dfc0c9e938faf0 /R/pkg
parent	56a9692fc06077e31b37c00957e8011235f4e4eb (diff)
download	spark-e8f90d9dda3f87fef01c683462eac67aad750f60.tar.gz spark-e8f90d9dda3f87fef01c683462eac67aad750f60.tar.bz2 spark-e8f90d9dda3f87fef01c683462eac67aad750f60.zip