diff options
Diffstat (limited to 'R/pkg')
-rw-r--r-- | R/pkg/NAMESPACE | 1 | ||||
-rw-r--r-- | R/pkg/R/DataFrame.R | 30 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 13 |
3 files changed, 26 insertions, 18 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index c3ec3f4fb1..0cd9cb89d5 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -133,6 +133,7 @@ exportMethods("arrange", "summarize", "summary", "take", + "toJSON", "transform", "union", "unionAll", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 9a51d530f1..7737ffe4ed 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -737,26 +737,32 @@ setMethod("repartition", #' toJSON #' -#' Convert the rows of a SparkDataFrame into JSON objects and return an RDD where -#' each element contains a JSON string. +#' Converts a SparkDataFrame into a SparkDataFrame of JSON string. #' -#' @param x A SparkDataFrame -#' @return A StringRRDD of JSON objects +#' Each row is turned into a JSON document with columns as different fields. +#' The returned SparkDataFrame has a single character column with the name \code{value} +#' +#' @param x a SparkDataFrame +#' @return a SparkDataFrame +#' @family SparkDataFrame functions +#' @rdname toJSON +#' @name toJSON #' @aliases toJSON,SparkDataFrame-method -#' @noRd +#' @export #' @examples #'\dontrun{ #' sparkR.session() -#' path <- "path/to/file.json" -#' df <- read.json(path) -#' newRDD <- toJSON(df) +#' path <- "path/to/file.parquet" +#' df <- read.parquet(path) +#' df_json <- toJSON(df) #'} +#' @note toJSON since 2.2.0 setMethod("toJSON", signature(x = "SparkDataFrame"), function(x) { - rdd <- callJMethod(x@sdf, "toJSON") - jrdd <- callJMethod(rdd, "toJavaRDD") - RDD(jrdd, serializedMode = "string") + jsonDS <- callJMethod(x@sdf, "toJSON") + df <- callJMethod(jsonDS, "toDF") + dataFrame(df) }) #' Save the contents of SparkDataFrame as a JSON file @@ -936,7 +942,7 @@ setMethod("unique", #' Sample #' -#' Return a sampled subset of this SparkDataFrame using a random seed. +#' Return a sampled subset of this SparkDataFrame using a random seed. #' Note: this is not guaranteed to provide exactly the fraction specified #' of the total count of of the given SparkDataFrame. #' diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 4490f31cd8..c3f0310c75 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1689,12 +1689,13 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { unlink(jsonPath3) }) -test_that("toJSON() returns an RDD of the correct values", { - df <- read.json(jsonPath) - testRDD <- toJSON(df) - expect_is(testRDD, "RDD") - expect_equal(getSerializedMode(testRDD), "string") - expect_equal(collectRDD(testRDD)[[1]], mockLines[1]) +test_that("toJSON() on DataFrame", { + df <- as.DataFrame(cars) + df_json <- toJSON(df) + expect_is(df_json, "SparkDataFrame") + expect_equal(colnames(df_json), c("value")) + expect_equal(head(df_json, 1), + data.frame(value = "{\"speed\":4.0,\"dist\":2.0}", stringsAsFactors = FALSE)) }) test_that("showDF()", { |