aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests/test_sparkSQL.R
diff options
context:
space:
mode:
authorSun Rui <rui.sun@intel.com>2015-10-13 10:02:21 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-10-13 10:02:21 -0700
commit5e3868ba139f5f0b3a33361c6b884594a3ab6421 (patch)
treea37f40a4f112f452f0eba4dd8ac5b9e1eb805e4b /R/pkg/inst/tests/test_sparkSQL.R
parentd0cc79ccd0b4500bd6b18184a723dabc164e8abd (diff)
downloadspark-5e3868ba139f5f0b3a33361c6b884594a3ab6421.tar.gz
spark-5e3868ba139f5f0b3a33361c6b884594a3ab6421.tar.bz2
spark-5e3868ba139f5f0b3a33361c6b884594a3ab6421.zip
[SPARK-10051] [SPARKR] Support collecting data of StructType in DataFrame
Two points in this PR: 1. Originally thought was that a named R list is assumed to be a struct in SerDe. But this is problematic because some R functions will implicitly generate named lists that are not intended to be a struct when transferred by SerDe. So SerDe clients have to explicitly mark a names list as struct by changing its class from "list" to "struct". 2. SerDe is in the Spark Core module, and data of StructType is represented as GenricRow which is defined in Spark SQL module. SerDe can't import GenricRow as in maven build Spark SQL module depends on Spark Core module. So this PR adds a registration hook in SerDe to allow SQLUtils in Spark SQL module to register its functions for serialization and deserialization of StructType. Author: Sun Rui <rui.sun@intel.com> Closes #8794 from sun-rui/SPARK-10051.
Diffstat (limited to 'R/pkg/inst/tests/test_sparkSQL.R')
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R51
1 files changed, 29 insertions, 22 deletions
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 3a04edbb4c..af6efa40fb 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -66,10 +66,7 @@ test_that("infer types and check types", {
expect_equal(infer_type(as.POSIXlt("2015-03-11 12:13:04.043")), "timestamp")
expect_equal(infer_type(c(1L, 2L)), "array<integer>")
expect_equal(infer_type(list(1L, 2L)), "array<integer>")
- testStruct <- infer_type(list(a = 1L, b = "2"))
- expect_equal(class(testStruct), "structType")
- checkStructField(testStruct$fields()[[1]], "a", "IntegerType", TRUE)
- checkStructField(testStruct$fields()[[2]], "b", "StringType", TRUE)
+ expect_equal(infer_type(listToStruct(list(a = 1L, b = "2"))), "struct<a:integer,b:string>")
e <- new.env()
assign("a", 1L, envir = e)
expect_equal(infer_type(e), "map<string,integer>")
@@ -242,38 +239,36 @@ test_that("create DataFrame with different data types", {
expect_equal(collect(df), data.frame(l, stringsAsFactors = FALSE))
})
-test_that("create DataFrame with nested array and map", {
-# e <- new.env()
-# assign("n", 3L, envir = e)
-# l <- list(1:10, list("a", "b"), e, list(a="aa", b=3L))
-# df <- createDataFrame(sqlContext, list(l), c("a", "b", "c", "d"))
-# expect_equal(dtypes(df), list(c("a", "array<int>"), c("b", "array<string>"),
-# c("c", "map<string,int>"), c("d", "struct<a:string,b:int>")))
-# expect_equal(count(df), 1)
-# ldf <- collect(df)
-# expect_equal(ldf[1,], l[[1]])
-
- # ArrayType and MapType
+test_that("create DataFrame with complex types", {
e <- new.env()
assign("n", 3L, envir = e)
- l <- list(as.list(1:10), list("a", "b"), e)
- df <- createDataFrame(sqlContext, list(l), c("a", "b", "c"))
+ s <- listToStruct(list(a = "aa", b = 3L))
+
+ l <- list(as.list(1:10), list("a", "b"), e, s)
+ df <- createDataFrame(sqlContext, list(l), c("a", "b", "c", "d"))
expect_equal(dtypes(df), list(c("a", "array<int>"),
c("b", "array<string>"),
- c("c", "map<string,int>")))
+ c("c", "map<string,int>"),
+ c("d", "struct<a:string,b:int>")))
expect_equal(count(df), 1)
ldf <- collect(df)
- expect_equal(names(ldf), c("a", "b", "c"))
+ expect_equal(names(ldf), c("a", "b", "c", "d"))
expect_equal(ldf[1, 1][[1]], l[[1]])
expect_equal(ldf[1, 2][[1]], l[[2]])
+
e <- ldf$c[[1]]
expect_equal(class(e), "environment")
expect_equal(ls(e), "n")
expect_equal(e$n, 3L)
+
+ s <- ldf$d[[1]]
+ expect_equal(class(s), "struct")
+ expect_equal(s$a, "aa")
+ expect_equal(s$b, 3L)
})
-# For test map type in DataFrame
+# For test map type and struct type in DataFrame
mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}",
"{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}",
"{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}")
@@ -308,7 +303,19 @@ test_that("Collect DataFrame with complex types", {
expect_equal(bob$age, 16)
expect_equal(bob$height, 176.5)
- # TODO: tests for StructType after it is supported
+ # StructType
+ df <- jsonFile(sqlContext, mapTypeJsonPath)
+ expect_equal(dtypes(df), list(c("info", "struct<age:bigint,height:double>"),
+ c("name", "string")))
+ ldf <- collect(df)
+ expect_equal(nrow(ldf), 3)
+ expect_equal(ncol(ldf), 2)
+ expect_equal(names(ldf), c("info", "name"))
+ expect_equal(ldf$name, c("Bob", "Alice", "David"))
+ bob <- ldf$info[[1]]
+ expect_equal(class(bob), "struct")
+ expect_equal(bob$age, 16)
+ expect_equal(bob$height, 176.5)
})
test_that("jsonFile() on a local file returns a DataFrame", {