[SPARK-19795][SPARKR] add column functions to_json, from_json

## What changes were proposed in this pull request? Add column functions: to_json, from_json, and tests covering error cases. ## How was this patch tested? unit tests, manual Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #17134 from felixcheung/rtojson.
author: Felix Cheung <felixcheung_m@hotmail.com> 2017-03-05 12:37:02 -0800
committer: Felix Cheung <felixcheung@apache.org> 2017-03-05 12:37:02 -0800
commit: 80d5338b32e856870cf187ce17bc87335d690761 (patch)
tree: 830e47367c9ada5ec645988b2540209869e342eb /R/pkg/inst/tests/testthat/test_sparkSQL.R
parent: 14bb398fae974137c3e38162cefc088e12838258 (diff)
download: spark-80d5338b32e856870cf187ce17bc87335d690761.tar.gz
spark-80d5338b32e856870cf187ce17bc87335d690761.tar.bz2
spark-80d5338b32e856870cf187ce17bc87335d690761.zip
1 files changed, 36 insertions, 7 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 1dd8c5ce6c..7c096597fe 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -88,6 +88,13 @@ mockLinesComplexType <-
 complexTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
 writeLines(mockLinesComplexType, complexTypeJsonPath)
 
+# For test map type and struct type in DataFrame
+mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}",
+                      "{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}",
+                      "{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}")
+mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
+writeLines(mockLinesMapType, mapTypeJsonPath)
+
 test_that("calling sparkRSQL.init returns existing SQL context", {
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
@@ -466,13 +473,6 @@ test_that("create DataFrame from a data.frame with complex types", {
   expect_equal(ldf$an_envir, collected$an_envir)
 })
 
-# For test map type and struct type in DataFrame
-mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}",
-                      "{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}",
-                      "{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}")
-mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
-writeLines(mockLinesMapType, mapTypeJsonPath)
-
 test_that("Collect DataFrame with complex types", {
   # ArrayType
   df <- read.json(complexTypeJsonPath)
@@ -1337,6 +1337,33 @@ test_that("column functions", {
   df <- createDataFrame(data.frame(x = c(2.5, 3.5)))
   expect_equal(collect(select(df, bround(df$x, 0)))[[1]][1], 2)
   expect_equal(collect(select(df, bround(df$x, 0)))[[1]][2], 4)
+
+  # Test to_json(), from_json()
+  df <- read.json(mapTypeJsonPath)
+  j <- collect(select(df, alias(to_json(df$info), "json")))
+  expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}")
+  df <- as.DataFrame(j)
+  schema <- structType(structField("age", "integer"),
+                       structField("height", "double"))
+  s <- collect(select(df, alias(from_json(df$json, schema), "structcol")))
+  expect_equal(ncol(s), 1)
+  expect_equal(nrow(s), 3)
+  expect_is(s[[1]][[1]], "struct")
+  expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 } )))
+
+  # passing option
+  df <- as.DataFrame(list(list("col" = "{\"date\":\"21/10/2014\"}")))
+  schema2 <- structType(structField("date", "date"))
+  expect_error(tryCatch(collect(select(df, from_json(df$col, schema2))),
+                        error = function(e) { stop(e) }),
+               paste0(".*(java.lang.NumberFormatException: For input string:).*"))
+  s <- collect(select(df, from_json(df$col, schema2, dateFormat = "dd/MM/yyyy")))
+  expect_is(s[[1]][[1]]$date, "Date")
+  expect_equal(as.character(s[[1]][[1]]$date), "2014-10-21")
+
+  # check for unparseable
+  df <- as.DataFrame(list(list("a" = "")))
+  expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
 })
 
 test_that("column binary mathfunctions", {
@@ -2867,5 +2894,7 @@ unlink(parquetPath)
 unlink(orcPath)
 unlink(jsonPath)
 unlink(jsonPathNa)
+unlink(complexTypeJsonPath)
+unlink(mapTypeJsonPath)
 
 sparkR.session.stop()
author	Felix Cheung <felixcheung_m@hotmail.com>	2017-03-05 12:37:02 -0800
committer	Felix Cheung <felixcheung@apache.org>	2017-03-05 12:37:02 -0800
commit	80d5338b32e856870cf187ce17bc87335d690761 (patch)
tree	830e47367c9ada5ec645988b2540209869e342eb /R/pkg/inst/tests/testthat/test_sparkSQL.R
parent	14bb398fae974137c3e38162cefc088e12838258 (diff)
download	spark-80d5338b32e856870cf187ce17bc87335d690761.tar.gz spark-80d5338b32e856870cf187ce17bc87335d690761.tar.bz2 spark-80d5338b32e856870cf187ce17bc87335d690761.zip