diff options
author | hyukjinkwon <gurwls223@gmail.com> | 2017-03-27 10:43:00 -0700 |
---|---|---|
committer | Felix Cheung <felixcheung@apache.org> | 2017-03-27 10:43:00 -0700 |
commit | 3fada2f502107bd5572fb895471943de7b2c38e4 (patch) | |
tree | b3b3494e12a1af2baf8d7518fe8b9227bc400cdb | |
parent | 314cf51ded52834cfbaacf58d3d05a220965ca2a (diff) | |
download | spark-3fada2f502107bd5572fb895471943de7b2c38e4.tar.gz spark-3fada2f502107bd5572fb895471943de7b2c38e4.tar.bz2 spark-3fada2f502107bd5572fb895471943de7b2c38e4.zip |
[SPARK-20105][TESTS][R] Add tests for checkType and type string in structField in R
## What changes were proposed in this pull request?
It seems `checkType` and the type string in `structField` are not being tested closely. This string format currently seems SparkR-specific (see https://github.com/apache/spark/blob/d1f6c64c4b763c05d6d79ae5497f298dc3835f3e/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala#L93-L131) but resembles SQL type definition.
Therefore, it seems nicer if we test positive/negative cases in R side.
## How was this patch tested?
Unit tests in `test_sparkSQL.R`.
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #17439 from HyukjinKwon/r-typestring-tests.
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 394d1a04e0..5acf8719d1 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -140,6 +140,59 @@ test_that("structType and structField", { expect_equal(testSchema$fields()[[1]]$dataType.toString(), "StringType") }) +test_that("structField type strings", { + # positive cases + primitiveTypes <- list(byte = "ByteType", + integer = "IntegerType", + float = "FloatType", + double = "DoubleType", + string = "StringType", + binary = "BinaryType", + boolean = "BooleanType", + timestamp = "TimestampType", + date = "DateType") + + complexTypes <- list("map<string,integer>" = "MapType(StringType,IntegerType,true)", + "array<string>" = "ArrayType(StringType,true)", + "struct<a:string>" = "StructType(StructField(a,StringType,true))") + + typeList <- c(primitiveTypes, complexTypes) + typeStrings <- names(typeList) + + for (i in seq_along(typeStrings)){ + typeString <- typeStrings[i] + expected <- typeList[[i]] + testField <- structField("_col", typeString) + expect_is(testField, "structField") + expect_true(testField$nullable()) + expect_equal(testField$dataType.toString(), expected) + } + + # negative cases + primitiveErrors <- list(Byte = "Byte", + INTEGER = "INTEGER", + numeric = "numeric", + character = "character", + raw = "raw", + logical = "logical") + + complexErrors <- list("map<string, integer>" = " integer", + "array<String>" = "String", + "struct<a:string >" = "string ", + "map <string,integer>" = "map <string,integer>", + "array< string>" = " string", + "struct<a: string>" = " string") + + errorList <- c(primitiveErrors, complexErrors) + typeStrings <- names(errorList) + + for (i in seq_along(typeStrings)){ + typeString <- typeStrings[i] + expected <- paste0("Unsupported type for SparkDataframe: ", errorList[[i]]) + expect_error(structField("_col", typeString), expected) + } +}) + test_that("create DataFrame from RDD", { rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) df <- createDataFrame(rdd, list("a", "b")) |