From f7f28ee7a513c262d52cf433d25fbf06df9bd1f1 Mon Sep 17 00:00:00 2001 From: Adrian Zhuang Date: Tue, 13 Oct 2015 10:21:07 -0700 Subject: [SPARK-10913] [SPARKR] attach() function support Bring the change code up to date. Author: Adrian Zhuang Author: adrian555 Closes #9031 from adrian555/attach2. --- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 30 ++++++++++++++++++++++++++++++ R/pkg/R/generics.R | 4 ++++ R/pkg/inst/tests/test_sparkSQL.R | 20 ++++++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 41986a5e7a..ed9cd94e03 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -23,6 +23,7 @@ export("setJobGroup", exportClasses("DataFrame") exportMethods("arrange", + "attach", "cache", "collect", "columns", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 1b9137e6c7..e0ce056243 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1881,3 +1881,33 @@ setMethod("as.data.frame", } collect(x) }) + +#' The specified DataFrame is attached to the R search path. This means that +#' the DataFrame is searched by R when evaluating a variable, so columns in +#' the DataFrame can be accessed by simply giving their names. +#' +#' @rdname attach +#' @title Attach DataFrame to R search path +#' @param what (DataFrame) The DataFrame to attach +#' @param pos (integer) Specify position in search() where to attach. +#' @param name (character) Name to use for the attached DataFrame. Names +#' starting with package: are reserved for library. +#' @param warn.conflicts (logical) If TRUE, warnings are printed about conflicts +#' from attaching the database, unless that DataFrame contains an object +#' @examples +#' \dontrun{ +#' attach(irisDf) +#' summary(Sepal_Width) +#' } +#' @seealso \link{detach} +setMethod("attach", + signature(what = "DataFrame"), + function(what, pos = 2, name = deparse(substitute(what)), warn.conflicts = TRUE) { + cols <- columns(what) + stopifnot(length(cols) > 0) + newEnv <- new.env() + for (i in 1:length(cols)) { + assign(x = cols[i], value = what[, cols[i]], envir = newEnv) + } + attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts) + }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 8fad17026c..c106a00245 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1003,3 +1003,7 @@ setGeneric("rbind", signature = "...") #' @rdname as.data.frame #' @export setGeneric("as.data.frame") + +#' @rdname attach +#' @export +setGeneric("attach") \ No newline at end of file diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index b599994854..d5509e475d 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -1405,6 +1405,26 @@ test_that("Method as.data.frame as a synonym for collect()", { expect_equal(as.data.frame(irisDF2), collect(irisDF2)) }) +test_that("attach() on a DataFrame", { + df <- jsonFile(sqlContext, jsonPath) + expect_error(age) + attach(df) + expect_is(age, "DataFrame") + expected_age <- data.frame(age = c(NA, 30, 19)) + expect_equal(head(age), expected_age) + stat <- summary(age) + expect_equal(collect(stat)[5, "age"], "30") + age <- age$age + 1 + expect_is(age, "Column") + rm(age) + stat2 <- summary(age) + expect_equal(collect(stat2)[5, "age"], "30") + detach("df") + stat3 <- summary(df[, "age"]) + expect_equal(collect(stat3)[5, "age"], "30") + expect_error(age) +}) + unlink(parquetPath) unlink(jsonPath) unlink(jsonPathNa) -- cgit v1.2.3