diff options
author | CHOIJAEHONG <redrock07@naver.com> | 2015-09-03 13:38:26 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-09-03 13:38:26 -0700 |
commit | af0e3125cb1d48b1fc0e44c42b6880d67a9f1a85 (patch) | |
tree | b13448cb7fecabac7323c4d926a6cbd5a6085912 /R/pkg/inst/tests/test_sparkSQL.R | |
parent | 3abc0d512541158d11b181e2d9fa126d1371d5c0 (diff) | |
download | spark-af0e3125cb1d48b1fc0e44c42b6880d67a9f1a85.tar.gz spark-af0e3125cb1d48b1fc0e44c42b6880d67a9f1a85.tar.bz2 spark-af0e3125cb1d48b1fc0e44c42b6880d67a9f1a85.zip |
[SPARK-8951] [SPARKR] support Unicode characters in collect()
Spark gives an error message and does not show the output when a field of the result DataFrame contains characters in CJK.
I changed SerDe.scala in order that Spark support Unicode characters when writes a string to R.
Author: CHOIJAEHONG <redrock07@naver.com>
Closes #7494 from CHOIJAEHONG1/SPARK-8951.
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/test_sparkSQL.R | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 0da5e38654..6d331f9883 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -431,6 +431,32 @@ test_that("collect() and take() on a DataFrame return the same number of rows an expect_equal(ncol(collect(df)), ncol(take(df, 10))) }) +test_that("collect() support Unicode characters", { + markUtf8 <- function(s) { + Encoding(s) <- "UTF-8" + s + } + + lines <- c("{\"name\":\"안녕하세요\"}", + "{\"name\":\"您好\", \"age\":30}", + "{\"name\":\"こんにちは\", \"age\":19}", + "{\"name\":\"Xin chào\"}") + + jsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") + writeLines(lines, jsonPath) + + df <- read.df(sqlContext, jsonPath, "json") + rdf <- collect(df) + expect_true(is.data.frame(rdf)) + expect_equal(rdf$name[1], markUtf8("안녕하세요")) + expect_equal(rdf$name[2], markUtf8("您好")) + expect_equal(rdf$name[3], markUtf8("こんにちは")) + expect_equal(rdf$name[4], markUtf8("Xin chào")) + + df1 <- createDataFrame(sqlContext, rdf) + expect_equal(collect(where(df1, df1$name == markUtf8("您好")))$name, markUtf8("您好")) +}) + test_that("multiple pipeline transformations result in an RDD with the correct values", { df <- jsonFile(sqlContext, jsonPath) first <- lapply(df, function(row) { |