diff options
author | Felix Cheung <felixcheung_m@hotmail.com> | 2016-12-22 20:54:38 -0800 |
---|---|---|
committer | Felix Cheung <felixcheung@apache.org> | 2016-12-22 20:54:38 -0800 |
commit | 17579bda3c114022a0b3889aa4c9188307af75e9 (patch) | |
tree | ee46ade7bd66ab0b97c08073b4094c99806ce2c0 /R | |
parent | f252cb5d161e064d39cc1ed1d9299307a0636174 (diff) | |
download | spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.gz spark-17579bda3c114022a0b3889aa4c9188307af75e9.tar.bz2 spark-17579bda3c114022a0b3889aa4c9188307af75e9.zip |
[SPARK-18958][SPARKR] R API toJSON on DataFrame
## What changes were proposed in this pull request?
It would make it easier to integrate with other component expecting row-based JSON format.
This replaces the non-public toJSON RDD API.
## How was this patch tested?
manual, unit tests
Author: Felix Cheung <felixcheung_m@hotmail.com>
Closes #16368 from felixcheung/rJSON.
Diffstat (limited to 'R')
-rw-r--r-- | R/pkg/NAMESPACE | 1 | ||||
-rw-r--r-- | R/pkg/R/DataFrame.R | 30 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 13 |
3 files changed, 26 insertions, 18 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index c3ec3f4fb1..0cd9cb89d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -133,6 +133,7 @@ exportMethods("arrange", "summarize", "summary", "take", + "toJSON", "transform", "union", "unionAll", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 9a51d530f1..7737ffe4ed 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -737,26 +737,32 @@ setMethod("repartition", #' toJSON #' -#' Convert the rows of a SparkDataFrame into JSON objects and return an RDD where -#' each element contains a JSON string. +#' Converts a SparkDataFrame into a SparkDataFrame of JSON string. #' -#' @param x A SparkDataFrame -#' @return A StringRRDD of JSON objects +#' Each row is turned into a JSON document with columns as different fields. +#' The returned SparkDataFrame has a single character column with the name \code{value} +#' +#' @param x a SparkDataFrame +#' @return a SparkDataFrame +#' @family SparkDataFrame functions +#' @rdname toJSON +#' @name toJSON #' @aliases toJSON,SparkDataFrame-method -#' @noRd +#' @export #' @examples #'\dontrun{ #' sparkR.session() -#' path <- "path/to/file.json" -#' df <- read.json(path) -#' newRDD <- toJSON(df) +#' path <- "path/to/file.parquet" +#' df <- read.parquet(path) +#' df_json <- toJSON(df) #'} +#' @note toJSON since 2.2.0 setMethod("toJSON", signature(x = "SparkDataFrame"), function(x) { - rdd <- callJMethod(x@sdf, "toJSON") - jrdd <- callJMethod(rdd, "toJavaRDD") - RDD(jrdd, serializedMode = "string") + jsonDS <- callJMethod(x@sdf, "toJSON") + df <- callJMethod(jsonDS, "toDF") + dataFrame(df) }) #' Save the contents of SparkDataFrame as a JSON file @@ -936,7 +942,7 @@ setMethod("unique", #' Sample #' -#' Return a sampled subset of this SparkDataFrame using a random seed. +#' Return a sampled subset of this SparkDataFrame using a random seed. #' Note: this is not guaranteed to provide exactly the fraction specified #' of the total count of of the given SparkDataFrame. #' diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 4490f31cd8..c3f0310c75 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1689,12 +1689,13 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { unlink(jsonPath3) }) -test_that("toJSON() returns an RDD of the correct values", { - df <- read.json(jsonPath) - testRDD <- toJSON(df) - expect_is(testRDD, "RDD") - expect_equal(getSerializedMode(testRDD), "string") - expect_equal(collectRDD(testRDD)[[1]], mockLines[1]) +test_that("toJSON() on DataFrame", { + df <- as.DataFrame(cars) + df_json <- toJSON(df) + expect_is(df_json, "SparkDataFrame") + expect_equal(colnames(df_json), c("value")) + expect_equal(head(df_json, 1), + data.frame(value = "{\"speed\":4.0,\"dist\":2.0}", stringsAsFactors = FALSE)) }) test_that("showDF()", { |