aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOscar D. Lara Yejas <olarayej@mail.usf.edu>2015-09-30 18:03:31 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-09-30 18:03:31 -0700
commitf21e2da03fbf8041fece476e3d5c699aef819451 (patch)
tree5e9d949731b60b599b444bea906167bba2e23ed3
parent89ea0041ae5a701ce8d211ed08f1f059b7f9c396 (diff)
downloadspark-f21e2da03fbf8041fece476e3d5c699aef819451.tar.gz
spark-f21e2da03fbf8041fece476e3d5c699aef819451.tar.bz2
spark-f21e2da03fbf8041fece476e3d5c699aef819451.zip
[SPARK-10807] [SPARKR] Added as.data.frame as a synonym for collect
Created method as.data.frame as a synonym for collect(). Author: Oscar D. Lara Yejas <olarayej@mail.usf.edu> Author: olarayej <oscar.lara.yejas@us.ibm.com> Author: Oscar D. Lara Yejas <oscar.lara.yejas@us.ibm.com> Closes #8908 from olarayej/SPARK-10807.
-rw-r--r--R/pkg/NAMESPACE2
-rw-r--r--R/pkg/R/DataFrame.R25
-rw-r--r--R/pkg/R/generics.R4
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R9
4 files changed, 39 insertions, 1 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 9d39630706..c28c47daea 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -247,3 +247,5 @@ export("structField",
"structType.jobj",
"structType.structField",
"print.structType")
+
+export("as.data.frame") \ No newline at end of file
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c3c1893487..65e368c47d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1848,3 +1848,28 @@ setMethod("crosstab",
sct <- callJMethod(statFunctions, "crosstab", col1, col2)
collect(dataFrame(sct))
})
+
+
+#' This function downloads the contents of a DataFrame into an R's data.frame.
+#' Since data.frames are held in memory, ensure that you have enough memory
+#' in your system to accommodate the contents.
+#'
+#' @title Download data from a DataFrame into a data.frame
+#' @param x a DataFrame
+#' @return a data.frame
+#' @rdname as.data.frame
+#' @examples \dontrun{
+#'
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ])
+#' }
+setMethod("as.data.frame",
+ signature(x = "DataFrame"),
+ function(x, ...) {
+ # Check if additional parameters have been passed
+ if (length(list(...)) > 0) {
+ stop(paste("Unused argument(s): ", paste(list(...), collapse=", ")))
+ }
+ collect(x)
+ }
+)
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 43dd8d283a..3db41e0fe2 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -983,3 +983,7 @@ setGeneric("glm")
#' @rdname rbind
#' @export
setGeneric("rbind", signature = "...")
+
+#' @rdname as.data.frame
+#' @export
+setGeneric("as.data.frame") \ No newline at end of file
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e159a69584..8f85eecbc4 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -1327,6 +1327,13 @@ test_that("SQL error message is returned from JVM", {
expect_equal(grepl("Table Not Found: blah", retError), TRUE)
})
+test_that("Method as.data.frame as a synonym for collect()", {
+ irisDF <- createDataFrame(sqlContext, iris)
+ expect_equal(as.data.frame(irisDF), collect(irisDF))
+ irisDF2 <- irisDF[irisDF$Species == "setosa", ]
+ expect_equal(as.data.frame(irisDF2), collect(irisDF2))
+})
+
unlink(parquetPath)
unlink(jsonPath)
-unlink(jsonPathNa)
+unlink(jsonPathNa) \ No newline at end of file