diff options
author | Sun Rui <rui.sun@intel.com> | 2015-10-13 10:02:21 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-10-13 10:02:21 -0700 |
commit | 5e3868ba139f5f0b3a33361c6b884594a3ab6421 (patch) | |
tree | a37f40a4f112f452f0eba4dd8ac5b9e1eb805e4b /R/pkg/inst/tests/test_sparkSQL.R | |
parent | d0cc79ccd0b4500bd6b18184a723dabc164e8abd (diff) | |
download | spark-5e3868ba139f5f0b3a33361c6b884594a3ab6421.tar.gz spark-5e3868ba139f5f0b3a33361c6b884594a3ab6421.tar.bz2 spark-5e3868ba139f5f0b3a33361c6b884594a3ab6421.zip |
[SPARK-10051] [SPARKR] Support collecting data of StructType in DataFrame
Two points in this PR:
1. Originally thought was that a named R list is assumed to be a struct in SerDe. But this is problematic because some R functions will implicitly generate named lists that are not intended to be a struct when transferred by SerDe. So SerDe clients have to explicitly mark a names list as struct by changing its class from "list" to "struct".
2. SerDe is in the Spark Core module, and data of StructType is represented as GenricRow which is defined in Spark SQL module. SerDe can't import GenricRow as in maven build Spark SQL module depends on Spark Core module. So this PR adds a registration hook in SerDe to allow SQLUtils in Spark SQL module to register its functions for serialization and deserialization of StructType.
Author: Sun Rui <rui.sun@intel.com>
Closes #8794 from sun-rui/SPARK-10051.
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/test_sparkSQL.R | 51 |
1 files changed, 29 insertions, 22 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 3a04edbb4c..af6efa40fb 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -66,10 +66,7 @@ test_that("infer types and check types", { expect_equal(infer_type(as.POSIXlt("2015-03-11 12:13:04.043")), "timestamp") expect_equal(infer_type(c(1L, 2L)), "array<integer>") expect_equal(infer_type(list(1L, 2L)), "array<integer>") - testStruct <- infer_type(list(a = 1L, b = "2")) - expect_equal(class(testStruct), "structType") - checkStructField(testStruct$fields()[[1]], "a", "IntegerType", TRUE) - checkStructField(testStruct$fields()[[2]], "b", "StringType", TRUE) + expect_equal(infer_type(listToStruct(list(a = 1L, b = "2"))), "struct<a:integer,b:string>") e <- new.env() assign("a", 1L, envir = e) expect_equal(infer_type(e), "map<string,integer>") @@ -242,38 +239,36 @@ test_that("create DataFrame with different data types", { expect_equal(collect(df), data.frame(l, stringsAsFactors = FALSE)) }) -test_that("create DataFrame with nested array and map", { -# e <- new.env() -# assign("n", 3L, envir = e) -# l <- list(1:10, list("a", "b"), e, list(a="aa", b=3L)) -# df <- createDataFrame(sqlContext, list(l), c("a", "b", "c", "d")) -# expect_equal(dtypes(df), list(c("a", "array<int>"), c("b", "array<string>"), -# c("c", "map<string,int>"), c("d", "struct<a:string,b:int>"))) -# expect_equal(count(df), 1) -# ldf <- collect(df) -# expect_equal(ldf[1,], l[[1]]) - - # ArrayType and MapType +test_that("create DataFrame with complex types", { e <- new.env() assign("n", 3L, envir = e) - l <- list(as.list(1:10), list("a", "b"), e) - df <- createDataFrame(sqlContext, list(l), c("a", "b", "c")) + s <- listToStruct(list(a = "aa", b = 3L)) + + l <- list(as.list(1:10), list("a", "b"), e, s) + df <- createDataFrame(sqlContext, list(l), c("a", "b", "c", "d")) expect_equal(dtypes(df), list(c("a", "array<int>"), c("b", "array<string>"), - c("c", "map<string,int>"))) + c("c", "map<string,int>"), + c("d", "struct<a:string,b:int>"))) expect_equal(count(df), 1) ldf <- collect(df) - expect_equal(names(ldf), c("a", "b", "c")) + expect_equal(names(ldf), c("a", "b", "c", "d")) expect_equal(ldf[1, 1][[1]], l[[1]]) expect_equal(ldf[1, 2][[1]], l[[2]]) + e <- ldf$c[[1]] expect_equal(class(e), "environment") expect_equal(ls(e), "n") expect_equal(e$n, 3L) + + s <- ldf$d[[1]] + expect_equal(class(s), "struct") + expect_equal(s$a, "aa") + expect_equal(s$b, 3L) }) -# For test map type in DataFrame +# For test map type and struct type in DataFrame mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}", "{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}", "{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}") @@ -308,7 +303,19 @@ test_that("Collect DataFrame with complex types", { expect_equal(bob$age, 16) expect_equal(bob$height, 176.5) - # TODO: tests for StructType after it is supported + # StructType + df <- jsonFile(sqlContext, mapTypeJsonPath) + expect_equal(dtypes(df), list(c("info", "struct<age:bigint,height:double>"), + c("name", "string"))) + ldf <- collect(df) + expect_equal(nrow(ldf), 3) + expect_equal(ncol(ldf), 2) + expect_equal(names(ldf), c("info", "name")) + expect_equal(ldf$name, c("Bob", "Alice", "David")) + bob <- ldf$info[[1]] + expect_equal(class(bob), "struct") + expect_equal(bob$age, 16) + expect_equal(bob$height, 176.5) }) test_that("jsonFile() on a local file returns a DataFrame", { |