aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2016-12-22 20:54:38 -0800
committerFelix Cheung <felixcheung@apache.org>2016-12-22 20:54:38 -0800
commit17579bda3c114022a0b3889aa4c9188307af75e9 (patch)
treeee46ade7bd66ab0b97c08073b4094c99806ce2c0 /R
parentf252cb5d161e064d39cc1ed1d9299307a0636174 (diff)
downloadspark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.gz
spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.bz2
spark-17579bda3c114022a0b3889aa4c9188307af75e9.zip
[SPARK-18958][SPARKR] R API toJSON on DataFrame
## What changes were proposed in this pull request? It would make it easier to integrate with other component expecting row-based JSON format. This replaces the non-public toJSON RDD API. ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #16368 from felixcheung/rJSON.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE1
-rw-r--r--R/pkg/R/DataFrame.R30
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R13
3 files changed, 26 insertions, 18 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index c3ec3f4fb1..0cd9cb89d5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -133,6 +133,7 @@ exportMethods("arrange",
"summarize",
"summary",
"take",
+ "toJSON",
"transform",
"union",
"unionAll",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 9a51d530f1..7737ffe4ed 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -737,26 +737,32 @@ setMethod("repartition",
#' toJSON
#'
-#' Convert the rows of a SparkDataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
+#' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
#'
-#' @param x A SparkDataFrame
-#' @return A StringRRDD of JSON objects
+#' Each row is turned into a JSON document with columns as different fields.
+#' The returned SparkDataFrame has a single character column with the name \code{value}
+#'
+#' @param x a SparkDataFrame
+#' @return a SparkDataFrame
+#' @family SparkDataFrame functions
+#' @rdname toJSON
+#' @name toJSON
#' @aliases toJSON,SparkDataFrame-method
-#' @noRd
+#' @export
#' @examples
#'\dontrun{
#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' newRDD <- toJSON(df)
+#' path <- "path/to/file.parquet"
+#' df <- read.parquet(path)
+#' df_json <- toJSON(df)
#'}
+#' @note toJSON since 2.2.0
setMethod("toJSON",
signature(x = "SparkDataFrame"),
function(x) {
- rdd <- callJMethod(x@sdf, "toJSON")
- jrdd <- callJMethod(rdd, "toJavaRDD")
- RDD(jrdd, serializedMode = "string")
+ jsonDS <- callJMethod(x@sdf, "toJSON")
+ df <- callJMethod(jsonDS, "toDF")
+ dataFrame(df)
})
#' Save the contents of SparkDataFrame as a JSON file
@@ -936,7 +942,7 @@ setMethod("unique",
#' Sample
#'
-#' Return a sampled subset of this SparkDataFrame using a random seed.
+#' Return a sampled subset of this SparkDataFrame using a random seed.
#' Note: this is not guaranteed to provide exactly the fraction specified
#' of the total count of of the given SparkDataFrame.
#'
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 4490f31cd8..c3f0310c75 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1689,12 +1689,13 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
unlink(jsonPath3)
})
-test_that("toJSON() returns an RDD of the correct values", {
- df <- read.json(jsonPath)
- testRDD <- toJSON(df)
- expect_is(testRDD, "RDD")
- expect_equal(getSerializedMode(testRDD), "string")
- expect_equal(collectRDD(testRDD)[[1]], mockLines[1])
+test_that("toJSON() on DataFrame", {
+ df <- as.DataFrame(cars)
+ df_json <- toJSON(df)
+ expect_is(df_json, "SparkDataFrame")
+ expect_equal(colnames(df_json), c("value"))
+ expect_equal(head(df_json, 1),
+ data.frame(value = "{\"speed\":4.0,\"dist\":2.0}", stringsAsFactors = FALSE))
})
test_that("showDF()", {