aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorAdrian Zhuang <adrian555@users.noreply.github.com>2015-10-13 10:21:07 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-10-13 10:21:07 -0700
commitf7f28ee7a513c262d52cf433d25fbf06df9bd1f1 (patch)
treec117851aa454b56cbd28beef071c0d2c73221c62 /R
parent1e0aba90b9e73834af70d196f7f869b062d98d94 (diff)
downloadspark-f7f28ee7a513c262d52cf433d25fbf06df9bd1f1.tar.gz
spark-f7f28ee7a513c262d52cf433d25fbf06df9bd1f1.tar.bz2
spark-f7f28ee7a513c262d52cf433d25fbf06df9bd1f1.zip
[SPARK-10913] [SPARKR] attach() function support
Bring the change code up to date. Author: Adrian Zhuang <adrian555@users.noreply.github.com> Author: adrian555 <wzhuang@us.ibm.com> Closes #9031 from adrian555/attach2.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE1
-rw-r--r--R/pkg/R/DataFrame.R30
-rw-r--r--R/pkg/R/generics.R4
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R20
4 files changed, 55 insertions, 0 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 41986a5e7a..ed9cd94e03 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -23,6 +23,7 @@ export("setJobGroup",
exportClasses("DataFrame")
exportMethods("arrange",
+ "attach",
"cache",
"collect",
"columns",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 1b9137e6c7..e0ce056243 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1881,3 +1881,33 @@ setMethod("as.data.frame",
}
collect(x)
})
+
+#' The specified DataFrame is attached to the R search path. This means that
+#' the DataFrame is searched by R when evaluating a variable, so columns in
+#' the DataFrame can be accessed by simply giving their names.
+#'
+#' @rdname attach
+#' @title Attach DataFrame to R search path
+#' @param what (DataFrame) The DataFrame to attach
+#' @param pos (integer) Specify position in search() where to attach.
+#' @param name (character) Name to use for the attached DataFrame. Names
+#' starting with package: are reserved for library.
+#' @param warn.conflicts (logical) If TRUE, warnings are printed about conflicts
+#' from attaching the database, unless that DataFrame contains an object
+#' @examples
+#' \dontrun{
+#' attach(irisDf)
+#' summary(Sepal_Width)
+#' }
+#' @seealso \link{detach}
+setMethod("attach",
+ signature(what = "DataFrame"),
+ function(what, pos = 2, name = deparse(substitute(what)), warn.conflicts = TRUE) {
+ cols <- columns(what)
+ stopifnot(length(cols) > 0)
+ newEnv <- new.env()
+ for (i in 1:length(cols)) {
+ assign(x = cols[i], value = what[, cols[i]], envir = newEnv)
+ }
+ attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts)
+ })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8fad17026c..c106a00245 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1003,3 +1003,7 @@ setGeneric("rbind", signature = "...")
#' @rdname as.data.frame
#' @export
setGeneric("as.data.frame")
+
+#' @rdname attach
+#' @export
+setGeneric("attach") \ No newline at end of file
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index b599994854..d5509e475d 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1405,6 +1405,26 @@ test_that("Method as.data.frame as a synonym for collect()", {
expect_equal(as.data.frame(irisDF2), collect(irisDF2))
})
+test_that("attach() on a DataFrame", {
+ df <- jsonFile(sqlContext, jsonPath)
+ expect_error(age)
+ attach(df)
+ expect_is(age, "DataFrame")
+ expected_age <- data.frame(age = c(NA, 30, 19))
+ expect_equal(head(age), expected_age)
+ stat <- summary(age)
+ expect_equal(collect(stat)[5, "age"], "30")
+ age <- age$age + 1
+ expect_is(age, "Column")
+ rm(age)
+ stat2 <- summary(age)
+ expect_equal(collect(stat2)[5, "age"], "30")
+ detach("df")
+ stat3 <- summary(df[, "age"])
+ expect_equal(collect(stat3)[5, "age"], "30")
+ expect_error(age)
+})
+
unlink(parquetPath)
unlink(jsonPath)
unlink(jsonPathNa)