[SPARK-18958][SPARKR] R API toJSON on DataFrame

## What changes were proposed in this pull request? It would make it easier to integrate with other component expecting row-based JSON format. This replaces the non-public toJSON RDD API. ## How was this patch tested? manual, unit tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #16368 from felixcheung/rJSON.
author: Felix Cheung <felixcheung_m@hotmail.com> 2016-12-22 20:54:38 -0800
committer: Felix Cheung <felixcheung@apache.org> 2016-12-22 20:54:38 -0800
commit: 17579bda3c114022a0b3889aa4c9188307af75e9 (patch)
tree: ee46ade7bd66ab0b97c08073b4094c99806ce2c0 /R
parent: f252cb5d161e064d39cc1ed1d9299307a0636174 (diff)
download: spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.gz
spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.bz2
spark-17579bda3c114022a0b3889aa4c9188307af75e9.zip
3 files changed, 26 insertions, 18 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index c3ec3f4fb1..0cd9cb89d5 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -133,6 +133,7 @@ exportMethods("arrange",
               "summarize",
               "summary",
               "take",
+              "toJSON",
               "transform",
               "union",
               "unionAll",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 9a51d530f1..7737ffe4ed 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -737,26 +737,32 @@ setMethod("repartition",
 
 #' toJSON
 #'
-#' Convert the rows of a SparkDataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
+#' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
 #'
-#' @param x A SparkDataFrame
-#' @return A StringRRDD of JSON objects
+#' Each row is turned into a JSON document with columns as different fields.
+#' The returned SparkDataFrame has a single character column with the name \code{value}
+#'
+#' @param x a SparkDataFrame
+#' @return a SparkDataFrame
+#' @family SparkDataFrame functions
+#' @rdname toJSON
+#' @name toJSON
 #' @aliases toJSON,SparkDataFrame-method
-#' @noRd
+#' @export
 #' @examples
 #'\dontrun{
 #' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' newRDD <- toJSON(df)
+#' path <- "path/to/file.parquet"
+#' df <- read.parquet(path)
+#' df_json <- toJSON(df)
 #'}
+#' @note toJSON since 2.2.0
 setMethod("toJSON",
           signature(x = "SparkDataFrame"),
           function(x) {
-            rdd <- callJMethod(x@sdf, "toJSON")
-            jrdd <- callJMethod(rdd, "toJavaRDD")
-            RDD(jrdd, serializedMode = "string")
+            jsonDS <- callJMethod(x@sdf, "toJSON")
+            df <- callJMethod(jsonDS, "toDF")
+            dataFrame(df)
           })
 
 #' Save the contents of SparkDataFrame as a JSON file
@@ -936,7 +942,7 @@ setMethod("unique",
 
 #' Sample
 #'
-#' Return a sampled subset of this SparkDataFrame using a random seed. 
+#' Return a sampled subset of this SparkDataFrame using a random seed.
 #' Note: this is not guaranteed to provide exactly the fraction specified
 #' of the total count of of the given SparkDataFrame.
 #'
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 4490f31cd8..c3f0310c75 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1689,12 +1689,13 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   unlink(jsonPath3)
 })
 
-test_that("toJSON() returns an RDD of the correct values", {
-  df <- read.json(jsonPath)
-  testRDD <- toJSON(df)
-  expect_is(testRDD, "RDD")
-  expect_equal(getSerializedMode(testRDD), "string")
-  expect_equal(collectRDD(testRDD)[[1]], mockLines[1])
+test_that("toJSON() on DataFrame", {
+  df <- as.DataFrame(cars)
+  df_json <- toJSON(df)
+  expect_is(df_json, "SparkDataFrame")
+  expect_equal(colnames(df_json), c("value"))
+  expect_equal(head(df_json, 1),
+              data.frame(value = "{\"speed\":4.0,\"dist\":2.0}", stringsAsFactors = FALSE))
 })
 
 test_that("showDF()", {
author	Felix Cheung <felixcheung_m@hotmail.com>	2016-12-22 20:54:38 -0800
committer	Felix Cheung <felixcheung@apache.org>	2016-12-22 20:54:38 -0800
commit	17579bda3c114022a0b3889aa4c9188307af75e9 (patch)
tree	ee46ade7bd66ab0b97c08073b4094c99806ce2c0 /R
parent	f252cb5d161e064d39cc1ed1d9299307a0636174 (diff)
download	spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.gz spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.bz2 spark-17579bda3c114022a0b3889aa4c9188307af75e9.zip