[SPARK-10904] [SPARKR] Fix to support `select(df, c("col1", "col2"))`

The fix is to coerce `c("a", "b")` into a list such that it could be serialized to call JVM with. Author: felixcheung <felixcheung_m@hotmail.com> Closes #8961 from felixcheung/rselect.
author: felixcheung <felixcheung_m@hotmail.com> 2015-10-03 22:42:36 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-10-03 22:42:36 -0700
commit: 721e8b5f35b230ff426c1757a9bdc1399fb19afa (patch)
tree: e56ddb71163299a662b193425b2f67a8d154220b /R
parent: ae6570ec2bf937e28bd1e7bada7813ac56a7b79d (diff)
download: spark-721e8b5f35b230ff426c1757a9bdc1399fb19afa.tar.gz
spark-721e8b5f35b230ff426c1757a9bdc1399fb19afa.tar.bz2
spark-721e8b5f35b230ff426c1757a9bdc1399fb19afa.zip
2 files changed, 21 insertions, 6 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 65e368c47d..14aea923fc 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1075,12 +1075,20 @@ setMethod("subset", signature(x = "DataFrame"),
 #'   select(df, c("col1", "col2"))
 #'   select(df, list(df$name, df$age + 1))
 #'   # Similar to R data frames columns can also be selected using `$`
-#'   df$age
+#'   df[,df$age]
 #' }
 setMethod("select", signature(x = "DataFrame", col = "character"),
           function(x, col, ...) {
-            sdf <- callJMethod(x@sdf, "select", col, list(...))
-            dataFrame(sdf)
+            if (length(col) > 1) {
+              if (length(list(...)) > 0) {
+                stop("To select multiple columns, use a character vector or list for col")
+              }
+
+              select(x, as.list(col))
+            } else {
+              sdf <- callJMethod(x@sdf, "select", col, list(...))
+              dataFrame(sdf)
+            }
           })
 
 #' @rdname select
@@ -1853,13 +1861,13 @@ setMethod("crosstab",
 #' This function downloads the contents of a DataFrame into an R's data.frame.
 #' Since data.frames are held in memory, ensure that you have enough memory
 #' in your system to accommodate the contents.
-#' 
+#'
 #' @title Download data from a DataFrame into a data.frame
 #' @param x a DataFrame
 #' @return a data.frame
 #' @rdname as.data.frame
 #' @examples \dontrun{
-#' 
+#'
 #' irisDF <- createDataFrame(sqlContext, iris)
 #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ])
 #' }
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 8f85eecbc4..faf42b7182 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -673,6 +673,13 @@ test_that("select with column", {
   expect_equal(columns(df3), c("x"))
   expect_equal(count(df3), 3)
   expect_equal(collect(select(df3, "x"))[[1, 1]], "x")
+
+  df4 <- select(df, c("name", "age"))
+  expect_equal(columns(df4), c("name", "age"))
+  expect_equal(count(df4), 3)
+
+  expect_error(select(df, c("name", "age"), "name"),
+                "To select multiple columns, use a character vector or list for col")
 })
 
 test_that("subsetting", {
@@ -1336,4 +1343,4 @@ test_that("Method as.data.frame as a synonym for collect()", {
 
 unlink(parquetPath)
 unlink(jsonPath)
-unlink(jsonPathNa)
-\ No newline at end of file
+unlink(jsonPathNa)
author	felixcheung <felixcheung_m@hotmail.com>	2015-10-03 22:42:36 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-10-03 22:42:36 -0700
commit	721e8b5f35b230ff426c1757a9bdc1399fb19afa (patch)
tree	e56ddb71163299a662b193425b2f67a8d154220b /R
parent	ae6570ec2bf937e28bd1e7bada7813ac56a7b79d (diff)
download	spark-721e8b5f35b230ff426c1757a9bdc1399fb19afa.tar.gz spark-721e8b5f35b230ff426c1757a9bdc1399fb19afa.tar.bz2 spark-721e8b5f35b230ff426c1757a9bdc1399fb19afa.zip