From c7e68c3968357268f705dee1477c448472c21547 Mon Sep 17 00:00:00 2001 From: Sun Rui Date: Sun, 13 Mar 2016 14:30:44 -0700 Subject: [SPARK-13812][SPARKR] Fix SparkR lint-r test errors. ## What changes were proposed in this pull request? This PR fixes all newly captured SparkR lint-r errors after the lintr package is updated from github. ## How was this patch tested? dev/lint-r SparkR unit tests Author: Sun Rui Closes #11652 from sun-rui/SPARK-13812. --- R/pkg/DESCRIPTION | 5 +- R/pkg/R/DataFrame.R | 16 +-- R/pkg/R/RDD.R | 2 +- R/pkg/R/context.R | 3 + R/pkg/R/deserialize.R | 2 +- R/pkg/R/generics.R | 4 +- R/pkg/R/mllib.R | 2 +- R/pkg/R/serialize.R | 2 +- R/pkg/R/sparkR.R | 6 +- R/pkg/R/utils.R | 4 +- R/pkg/inst/profile/general.R | 2 +- R/pkg/inst/tests/testthat/packageInAJarTest.R | 4 +- R/pkg/inst/tests/testthat/test_binaryFile.R | 14 +- R/pkg/inst/tests/testthat/test_binary_function.R | 6 +- R/pkg/inst/tests/testthat/test_broadcast.R | 4 +- R/pkg/inst/tests/testthat/test_mllib.R | 4 +- R/pkg/inst/tests/testthat/test_rdd.R | 82 ++++++------ R/pkg/inst/tests/testthat/test_sparkSQL.R | 156 +++++++++++------------ R/pkg/inst/tests/testthat/test_textFile.R | 24 ++-- R/pkg/inst/tests/testthat/test_utils.R | 8 +- R/pkg/inst/worker/worker.R | 2 +- 21 files changed, 178 insertions(+), 174 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 465bc37788..0cd0d75df0 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -18,10 +18,10 @@ Collate: 'schema.R' 'generics.R' 'jobj.R' - 'RDD.R' - 'pairRDD.R' 'column.R' 'group.R' + 'RDD.R' + 'pairRDD.R' 'DataFrame.R' 'SQLContext.R' 'backend.R' @@ -36,3 +36,4 @@ Collate: 'stats.R' 'types.R' 'utils.R' +RoxygenNote: 5.0.1 diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 50655e9382..a64a013b65 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -321,7 +321,7 @@ setMethod("colnames<-", } # Check if the column names have . in it - if (any(regexec(".", value, fixed=TRUE)[[1]][1] != -1)) { + if (any(regexec(".", value, fixed = TRUE)[[1]][1] != -1)) { stop("Colum names cannot contain the '.' symbol.") } @@ -351,7 +351,7 @@ setMethod("coltypes", types <- sapply(dtypes(x), function(x) {x[[2]]}) # Map Spark data types into R's data types using DATA_TYPES environment - rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) { + rTypes <- sapply(types, USE.NAMES = F, FUN = function(x) { # Check for primitive types type <- PRIMITIVE_TYPES[[x]] @@ -1779,7 +1779,7 @@ setMethod("merge", signature(x = "DataFrame", y = "DataFrame"), function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all, - sort = TRUE, suffixes = c("_x","_y"), ... ) { + sort = TRUE, suffixes = c("_x", "_y"), ... ) { if (length(suffixes) != 2) { stop("suffixes must have length 2") @@ -2299,7 +2299,7 @@ setMethod("as.data.frame", function(x, ...) { # Check if additional parameters have been passed if (length(list(...)) > 0) { - stop(paste("Unused argument(s): ", paste(list(...), collapse=", "))) + stop(paste("Unused argument(s): ", paste(list(...), collapse = ", "))) } collect(x) }) @@ -2395,13 +2395,13 @@ setMethod("str", # Get the first elements for each column firstElements <- if (types[i] == "character") { - paste(paste0("\"", localDF[,i], "\""), collapse = " ") + paste(paste0("\"", localDF[, i], "\""), collapse = " ") } else { - paste(localDF[,i], collapse = " ") + paste(localDF[, i], collapse = " ") } # Add the corresponding number of spaces for alignment - spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse="") + spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse = "") # Get the short type. For 'character', it would be 'chr'; # 'for numeric', it's 'num', etc. @@ -2413,7 +2413,7 @@ setMethod("str", # Concatenate the colnames, coltypes, and first # elements of each column line <- paste0(" $ ", names[i], spaces, ": ", - dataType, " ",firstElements) + dataType, " ", firstElements) # Chop off extra characters if this is too long cat(substr(line, 1, MAX_CHAR_PER_ROW)) diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index a78fbb714f..35c4e6f1af 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode, setMethod("show", "RDD", function(object) { - cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep="")) + cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = "")) }) setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) { diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 471bec1eac..b0e67c8ad2 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -103,7 +103,10 @@ parallelize <- function(sc, coll, numSlices = 1) { # TODO: bound/safeguard numSlices # TODO: unit tests for if the split works for all primitives # TODO: support matrix, data frame, etc + # nolint start + # suppress lintr warning: Place a space before left parenthesis, except in a function call. if ((!is.list(coll) && !is.vector(coll)) || is.data.frame(coll)) { + # nolint end if (is.data.frame(coll)) { message(paste("context.R: A data frame is parallelized by columns.")) } else { diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index d8a0393275..eefdf17873 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -186,7 +186,7 @@ readMultipleObjects <- function(inputCon) { # of the objects, so the number of objects varies, we try to read # all objects in a loop until the end of the stream. data <- list() - while(TRUE) { + while (TRUE) { # If reaching the end of the stream, type returned should be "". type <- readType(inputCon) if (type == "") { diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ddfa61717a..6ad71fcb46 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -607,7 +607,7 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") #' @rdname showDF #' @export -setGeneric("showDF", function(x,...) { standardGeneric("showDF") }) +setGeneric("showDF", function(x, ...) { standardGeneric("showDF") }) # @rdname subset # @export @@ -615,7 +615,7 @@ setGeneric("subset", function(x, ...) { standardGeneric("subset") }) #' @rdname agg #' @export -setGeneric("summarize", function(x,...) { standardGeneric("summarize") }) +setGeneric("summarize", function(x, ...) { standardGeneric("summarize") }) #' @rdname summary #' @export diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 346f33d7da..5c0d3dcf3a 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -54,7 +54,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "DataFram function(formula, family = c("gaussian", "binomial"), data, lambda = 0, alpha = 0, standardize = TRUE, solver = "auto") { family <- match.arg(family) - formula <- paste(deparse(formula), collapse="") + formula <- paste(deparse(formula), collapse = "") model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers", "fitRModelFormula", formula, data@sdf, family, lambda, alpha, standardize, solver) diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R index 70e87a93e6..3bbf60d9b6 100644 --- a/R/pkg/R/serialize.R +++ b/R/pkg/R/serialize.R @@ -100,7 +100,7 @@ writeJobj <- function(con, value) { writeString <- function(con, value) { utfVal <- enc2utf8(value) writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1)) - writeBin(utfVal, con, endian = "big", useBytes=TRUE) + writeBin(utfVal, con, endian = "big", useBytes = TRUE) } writeInt <- function(con, value) { diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 3e9eafc7f5..c187869fdf 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -153,7 +153,7 @@ sparkR.init <- function( if (!file.exists(path)) { stop("JVM is not ready after 10 seconds") } - f <- file(path, open="rb") + f <- file(path, open = "rb") backendPort <- readInt(f) monitorPort <- readInt(f) rLibPath <- readString(f) @@ -185,9 +185,9 @@ sparkR.init <- function( } sparkExecutorEnvMap <- convertNamedListToEnv(sparkExecutorEnv) - if(is.null(sparkExecutorEnvMap$LD_LIBRARY_PATH)) { + if (is.null(sparkExecutorEnvMap$LD_LIBRARY_PATH)) { sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- - paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH")) + paste0("$LD_LIBRARY_PATH:", Sys.getenv("LD_LIBRARY_PATH")) } # Classpath separator is ";" on Windows diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index aa386e5da9..fb6575cb42 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -158,7 +158,7 @@ wrapInt <- function(value) { # Multiply `val` by 31 and add `addVal` to the result. Ensures that # integer-overflows are handled at every step. mult31AndAdd <- function(val, addVal) { - vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + vec <- c(bitwShiftL(val, c(4, 3, 2, 1, 0)), addVal) Reduce(function(a, b) { wrapInt(as.numeric(a) + as.numeric(b)) }, @@ -202,7 +202,7 @@ serializeToString <- function(rdd) { # This function amortizes the allocation cost by doubling # the size of the list every time it fills up. addItemToAccumulator <- function(acc, item) { - if(acc$counter == acc$size) { + if (acc$counter == acc$size) { acc$size <- acc$size * 2 length(acc$data) <- acc$size } diff --git a/R/pkg/inst/profile/general.R b/R/pkg/inst/profile/general.R index c55fe9ba7a..8c75c19ca7 100644 --- a/R/pkg/inst/profile/general.R +++ b/R/pkg/inst/profile/general.R @@ -19,5 +19,5 @@ packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR") dirs <- strsplit(packageDir, ",")[[1]] .libPaths(c(dirs, .libPaths())) - Sys.setenv(NOAWT=1) + Sys.setenv(NOAWT = 1) } diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R index 207a37a0cb..c26b28b78d 100644 --- a/R/pkg/inst/tests/testthat/packageInAJarTest.R +++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R @@ -25,6 +25,6 @@ run2 <- myfunc(-4L) sparkR.stop() -if(run1 != 6) quit(save = "no", status = 1) +if (run1 != 6) quit(save = "no", status = 1) -if(run2 != -3) quit(save = "no", status = 1) +if (run2 != -3) quit(save = "no", status = 1) diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R index f2452ed97d..976a7558a8 100644 --- a/R/pkg/inst/tests/testthat/test_binaryFile.R +++ b/R/pkg/inst/tests/testthat/test_binaryFile.R @@ -23,8 +23,8 @@ sc <- sparkR.init() mockFile <- c("Spark is pretty.", "Spark is awesome.") test_that("saveAsObjectFile()/objectFile() following textFile() works", { - fileName1 <- tempfile(pattern="spark-test", fileext=".tmp") - fileName2 <- tempfile(pattern="spark-test", fileext=".tmp") + fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") + fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) rdd <- textFile(sc, fileName1, 1) @@ -37,7 +37,7 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", { }) test_that("saveAsObjectFile()/objectFile() works on a parallelized list", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") l <- list(1, 2, 3) rdd <- parallelize(sc, l, 1) @@ -49,8 +49,8 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", { }) test_that("saveAsObjectFile()/objectFile() following RDD transformations works", { - fileName1 <- tempfile(pattern="spark-test", fileext=".tmp") - fileName2 <- tempfile(pattern="spark-test", fileext=".tmp") + fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") + fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) rdd <- textFile(sc, fileName1) @@ -73,8 +73,8 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works", }) test_that("saveAsObjectFile()/objectFile() works with multiple paths", { - fileName1 <- tempfile(pattern="spark-test", fileext=".tmp") - fileName2 <- tempfile(pattern="spark-test", fileext=".tmp") + fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") + fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") rdd1 <- parallelize(sc, "Spark is pretty.") saveAsObjectFile(rdd1, fileName1) diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R index f054ac9a87..7bad4d2a7e 100644 --- a/R/pkg/inst/tests/testthat/test_binary_function.R +++ b/R/pkg/inst/tests/testthat/test_binary_function.R @@ -31,7 +31,7 @@ test_that("union on two RDDs", { actual <- collect(unionRDD(rdd, rdd)) expect_equal(actual, as.list(rep(nums, 2))) - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) text.rdd <- textFile(sc, fileName) @@ -74,10 +74,10 @@ test_that("zipPartitions() on RDDs", { actual <- collect(zipPartitions(rdd1, rdd2, rdd3, func = function(x, y, z) { list(list(x, y, z))} )) expect_equal(actual, - list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))) + list(list(1, c(1, 2), c(1, 2, 3)), list(2, c(3, 4), c(4, 5, 6)))) mockFile <- c("Spark is pretty.", "Spark is awesome.") - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName, 1) diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R index bb86a5c922..8be6efc3db 100644 --- a/R/pkg/inst/tests/testthat/test_broadcast.R +++ b/R/pkg/inst/tests/testthat/test_broadcast.R @@ -25,7 +25,7 @@ nums <- 1:2 rrdd <- parallelize(sc, nums, 2L) test_that("using broadcast variable", { - randomMat <- matrix(nrow=10, ncol=10, data=rnorm(100)) + randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) randomMatBr <- broadcast(sc, randomMat) useBroadcast <- function(x) { @@ -37,7 +37,7 @@ test_that("using broadcast variable", { }) test_that("without using broadcast variable", { - randomMat <- matrix(nrow=10, ncol=10, data=rnorm(100)) + randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) useBroadcast <- function(x) { sum(randomMat * x) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index af84a0abcf..e120462964 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -96,9 +96,9 @@ test_that("summary coefficients match with native glm of family 'binomial'", { training <- filter(df, df$Species != "setosa") stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training, family = "binomial")) - coefs <- as.vector(stats$coefficients[,1]) + coefs <- as.vector(stats$coefficients[, 1]) - rTraining <- iris[iris$Species %in% c("versicolor","virginica"),] + rTraining <- iris[iris$Species %in% c("versicolor", "virginica"), ] rCoefs <- as.vector(coef(glm(Species ~ Sepal.Length + Sepal.Width, data = rTraining, family = binomial(link = "logit")))) diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R index 1b3a22486e..3b0c16be5a 100644 --- a/R/pkg/inst/tests/testthat/test_rdd.R +++ b/R/pkg/inst/tests/testthat/test_rdd.R @@ -75,7 +75,7 @@ test_that("mapPartitions on RDD", { test_that("flatMap() on RDDs", { flat <- flatMap(intRdd, function(x) { list(x, x) }) actual <- collect(flat) - expect_equal(actual, rep(intPairs, each=2)) + expect_equal(actual, rep(intPairs, each = 2)) }) test_that("filterRDD on RDD", { @@ -245,9 +245,9 @@ test_that("mapValues() on pairwise RDDs", { }) test_that("flatMapValues() on pairwise RDDs", { - l <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4)))) + l <- parallelize(sc, list(list(1, c(1, 2)), list(2, c(3, 4)))) actual <- collect(flatMapValues(l, function(x) { x })) - expect_equal(actual, list(list(1,1), list(1,2), list(2,3), list(2,4))) + expect_equal(actual, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) # Generate x to x+1 for every value actual <- collect(flatMapValues(intRdd, function(x) { x: (x + 1) })) @@ -448,12 +448,12 @@ test_that("zipRDD() on RDDs", { list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004))) mockFile <- c("Spark is pretty.", "Spark is awesome.") - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName, 1) actual <- collect(zipRDD(rdd, rdd)) - expected <- lapply(mockFile, function(x) { list(x ,x) }) + expected <- lapply(mockFile, function(x) { list(x, x) }) expect_equal(actual, expected) rdd1 <- parallelize(sc, 0:1, 1) @@ -484,7 +484,7 @@ test_that("cartesian() on RDDs", { expect_equal(actual, list()) mockFile <- c("Spark is pretty.", "Spark is awesome.") - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) @@ -523,19 +523,19 @@ test_that("subtract() on RDDs", { # subtract by an empty RDD rdd2 <- parallelize(sc, list()) actual <- collect(subtract(rdd1, rdd2)) - expect_equal(as.list(sort(as.vector(actual, mode="integer"))), + expect_equal(as.list(sort(as.vector(actual, mode = "integer"))), l) rdd2 <- parallelize(sc, list(2, 4)) actual <- collect(subtract(rdd1, rdd2)) - expect_equal(as.list(sort(as.vector(actual, mode="integer"))), + expect_equal(as.list(sort(as.vector(actual, mode = "integer"))), list(1, 1, 3)) l <- list("a", "a", "b", "b", "c", "d") rdd1 <- parallelize(sc, l) rdd2 <- parallelize(sc, list("b", "d")) actual <- collect(subtract(rdd1, rdd2)) - expect_equal(as.list(sort(as.vector(actual, mode="character"))), + expect_equal(as.list(sort(as.vector(actual, mode = "character"))), list("a", "a", "c")) }) @@ -585,53 +585,53 @@ test_that("intersection() on RDDs", { }) test_that("join() on pairwise RDDs", { - rdd1 <- parallelize(sc, list(list(1,1), list(2,4))) - rdd2 <- parallelize(sc, list(list(1,2), list(1,3))) + rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) + rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) actual <- collect(join(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list(1, list(1, 2)), list(1, list(1, 3))))) - rdd1 <- parallelize(sc, list(list("a",1), list("b",4))) - rdd2 <- parallelize(sc, list(list("a",2), list("a",3))) + rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4))) + rdd2 <- parallelize(sc, list(list("a", 2), list("a", 3))) actual <- collect(join(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list("a", list(1, 2)), list("a", list(1, 3))))) - rdd1 <- parallelize(sc, list(list(1,1), list(2,2))) - rdd2 <- parallelize(sc, list(list(3,3), list(4,4))) + rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) + rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) actual <- collect(join(rdd1, rdd2, 2L)) expect_equal(actual, list()) - rdd1 <- parallelize(sc, list(list("a",1), list("b",2))) - rdd2 <- parallelize(sc, list(list("c",3), list("d",4))) + rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) + rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) actual <- collect(join(rdd1, rdd2, 2L)) expect_equal(actual, list()) }) test_that("leftOuterJoin() on pairwise RDDs", { - rdd1 <- parallelize(sc, list(list(1,1), list(2,4))) - rdd2 <- parallelize(sc, list(list(1,2), list(1,3))) + rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) + rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list("a",1), list("b",4))) - rdd2 <- parallelize(sc, list(list("a",2), list("a",3))) + rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4))) + rdd2 <- parallelize(sc, list(list("a", 2), list("a", 3))) actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(4, NULL)), list("a", list(1, 2)), list("a", list(1, 3))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list(1,1), list(2,2))) - rdd2 <- parallelize(sc, list(list(3,3), list(4,4))) + rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) + rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(1, NULL)), list(2, list(2, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list("a",1), list("b",2))) - rdd2 <- parallelize(sc, list(list("c",3), list("d",4))) + rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) + rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(2, NULL)), list("a", list(1, NULL))) expect_equal(sortKeyValueList(actual), @@ -639,57 +639,57 @@ test_that("leftOuterJoin() on pairwise RDDs", { }) test_that("rightOuterJoin() on pairwise RDDs", { - rdd1 <- parallelize(sc, list(list(1,2), list(1,3))) - rdd2 <- parallelize(sc, list(list(1,1), list(2,4))) + rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3))) + rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list("a",2), list("a",3))) - rdd2 <- parallelize(sc, list(list("a",1), list("b",4))) + rdd1 <- parallelize(sc, list(list("a", 2), list("a", 3))) + rdd2 <- parallelize(sc, list(list("a", 1), list("b", 4))) actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(NULL, 4)), list("a", list(2, 1)), list("a", list(3, 1))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list(1,1), list(2,2))) - rdd2 <- parallelize(sc, list(list(3,3), list(4,4))) + rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) + rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list(3, list(NULL, 3)), list(4, list(NULL, 4))))) - rdd1 <- parallelize(sc, list(list("a",1), list("b",2))) - rdd2 <- parallelize(sc, list(list("c",3), list("d",4))) + rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) + rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list("d", list(NULL, 4)), list("c", list(NULL, 3))))) }) test_that("fullOuterJoin() on pairwise RDDs", { - rdd1 <- parallelize(sc, list(list(1,2), list(1,3), list(3,3))) - rdd2 <- parallelize(sc, list(list(1,1), list(2,4))) + rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3))) + rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)), list(3, list(3, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list("a",2), list("a",3), list("c", 1))) - rdd2 <- parallelize(sc, list(list("a",1), list("b",4))) + rdd1 <- parallelize(sc, list(list("a", 2), list("a", 3), list("c", 1))) + rdd2 <- parallelize(sc, list(list("a", 1), list("b", 4))) actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(NULL, 4)), list("a", list(2, 1)), list("a", list(3, 1)), list("c", list(1, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) - rdd1 <- parallelize(sc, list(list(1,1), list(2,2))) - rdd2 <- parallelize(sc, list(list(3,3), list(4,4))) + rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) + rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list(1, list(1, NULL)), list(2, list(2, NULL)), list(3, list(NULL, 3)), list(4, list(NULL, 4))))) - rdd1 <- parallelize(sc, list(list("a",1), list("b",2))) - rdd2 <- parallelize(sc, list(list("c",3), list("d",4))) + rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) + rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list("a", list(1, NULL)), list("b", list(2, NULL)), diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 11a8f12fd5..63acbadfa6 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -41,8 +41,8 @@ sqlContext <- sparkRSQL.init(sc) mockLines <- c("{\"name\":\"Michael\"}", "{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"Justin\", \"age\":19}") -jsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") -parquetPath <- tempfile(pattern="sparkr-test", fileext=".parquet") +jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") +parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet") writeLines(mockLines, jsonPath) # For test nafunctions, like dropna(), fillna(),... @@ -51,7 +51,7 @@ mockLinesNa <- c("{\"name\":\"Bob\",\"age\":16,\"height\":176.5}", "{\"name\":\"David\",\"age\":60,\"height\":null}", "{\"name\":\"Amy\",\"age\":null,\"height\":null}", "{\"name\":null,\"age\":null,\"height\":null}") -jsonPathNa <- tempfile(pattern="sparkr-test", fileext=".tmp") +jsonPathNa <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesNa, jsonPathNa) # For test complex types in DataFrame @@ -59,7 +59,7 @@ mockLinesComplexType <- c("{\"c1\":[1, 2, 3], \"c2\":[\"a\", \"b\", \"c\"], \"c3\":[1.0, 2.0, 3.0]}", "{\"c1\":[4, 5, 6], \"c2\":[\"d\", \"e\", \"f\"], \"c3\":[4.0, 5.0, 6.0]}", "{\"c1\":[7, 8, 9], \"c2\":[\"g\", \"h\", \"i\"], \"c3\":[7.0, 8.0, 9.0]}") -complexTypeJsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") +complexTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesComplexType, complexTypeJsonPath) test_that("calling sparkRSQL.init returns existing SQL context", { @@ -151,9 +151,9 @@ test_that("create DataFrame from RDD", { expect_equal(as.list(collect(where(df2AsDF, df2AsDF$name == "Bob"))), list(name = "Bob", age = 16, height = 176.5)) - localDF <- data.frame(name=c("John", "Smith", "Sarah"), - age=c(19L, 23L, 18L), - height=c(176.5, 181.4, 173.7)) + localDF <- data.frame(name = c("John", "Smith", "Sarah"), + age = c(19L, 23L, 18L), + height = c(176.5, 181.4, 173.7)) df <- createDataFrame(sqlContext, localDF, schema) expect_is(df, "DataFrame") expect_equal(count(df), 3) @@ -263,7 +263,7 @@ test_that("create DataFrame from list or data.frame", { irisdf <- suppressWarnings(createDataFrame(sqlContext, iris)) iris_collected <- collect(irisdf) - expect_equivalent(iris_collected[,-5], iris[,-5]) + expect_equivalent(iris_collected[, -5], iris[, -5]) expect_equal(iris_collected$Species, as.character(iris$Species)) mtcarsdf <- createDataFrame(sqlContext, mtcars) @@ -329,7 +329,7 @@ test_that("create DataFrame from a data.frame with complex types", { mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}", "{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}", "{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}") -mapTypeJsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") +mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesMapType, mapTypeJsonPath) test_that("Collect DataFrame with complex types", { @@ -399,11 +399,11 @@ test_that("read/write json files", { expect_equal(count(df), 3) # Test write.df - jsonPath2 <- tempfile(pattern="jsonPath2", fileext=".json") - write.df(df, jsonPath2, "json", mode="overwrite") + jsonPath2 <- tempfile(pattern = "jsonPath2", fileext = ".json") + write.df(df, jsonPath2, "json", mode = "overwrite") # Test write.json - jsonPath3 <- tempfile(pattern="jsonPath3", fileext=".json") + jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json") write.json(df, jsonPath3) # Test read.json()/jsonFile() works with multiple input paths @@ -466,7 +466,7 @@ test_that("insertInto() on a registered table", { lines <- c("{\"name\":\"Bob\", \"age\":24}", "{\"name\":\"James\", \"age\":35}") - jsonPath2 <- tempfile(pattern="jsonPath2", fileext=".tmp") + jsonPath2 <- tempfile(pattern = "jsonPath2", fileext = ".tmp") parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") writeLines(lines, jsonPath2) df2 <- read.df(sqlContext, jsonPath2, "json") @@ -526,7 +526,7 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", { textLines <- c("Michael", "Andy, 30", "Justin, 19") - textPath <- tempfile(pattern="sparkr-textLines", fileext=".tmp") + textPath <- tempfile(pattern = "sparkr-textLines", fileext = ".tmp") writeLines(textLines, textPath) textRDD <- textFile(sc, textPath) @@ -547,7 +547,7 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", { }) test_that("objectFile() works with row serialization", { - objectPath <- tempfile(pattern="spark-test", fileext=".tmp") + objectPath <- tempfile(pattern = "spark-test", fileext = ".tmp") df <- read.json(sqlContext, jsonPath) dfRDD <- toRDD(df) saveAsObjectFile(coalesce(dfRDD, 1L), objectPath) @@ -611,7 +611,7 @@ test_that("collect() support Unicode characters", { "{\"name\":\"こんにちは\", \"age\":19}", "{\"name\":\"Xin chào\"}") - jsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(lines, jsonPath) df <- read.df(sqlContext, jsonPath, "json") @@ -705,7 +705,7 @@ test_that("names() colnames() set the column names", { # Test base::colnames base::names m2 <- cbind(1, 1:4) expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2")) - colnames(m2) <- c("x","Y") + colnames(m2) <- c("x", "Y") expect_equal(colnames(m2), c("x", "Y")) z <- list(a = 1, b = "c", c = 1:3) @@ -745,7 +745,7 @@ test_that("distinct(), unique() and dropDuplicates() on DataFrames", { "{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"Justin\", \"age\":19}", "{\"name\":\"Justin\", \"age\":19}") - jsonPathWithDup <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPathWithDup <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(lines, jsonPathWithDup) df <- read.json(sqlContext, jsonPathWithDup) @@ -774,7 +774,7 @@ test_that("distinct(), unique() and dropDuplicates() on DataFrames", { c(2, 2, 1), c(2, 2, 2)) names(expected) <- c("key", "value1", "value2") expect_equivalent( - result[order(result$key, result$value1, result$value2),], + result[order(result$key, result$value1, result$value2), ], expected) result <- collect(dropDuplicates(df, c("key", "value1"))) @@ -782,7 +782,7 @@ test_that("distinct(), unique() and dropDuplicates() on DataFrames", { c(1, 1, 1), c(1, 2, 1), c(2, 1, 2), c(2, 2, 2)) names(expected) <- c("key", "value1", "value2") expect_equivalent( - result[order(result$key, result$value1, result$value2),], + result[order(result$key, result$value1, result$value2), ], expected) result <- collect(dropDuplicates(df, "key")) @@ -790,7 +790,7 @@ test_that("distinct(), unique() and dropDuplicates() on DataFrames", { c(1, 1, 1), c(2, 1, 2)) names(expected) <- c("key", "value1", "value2") expect_equivalent( - result[order(result$key, result$value1, result$value2),], + result[order(result$key, result$value1, result$value2), ], expected) }) @@ -822,10 +822,10 @@ test_that("select operators", { expect_is(df[[2]], "Column") expect_is(df[["age"]], "Column") - expect_is(df[,1], "DataFrame") - expect_equal(columns(df[,1]), c("name")) - expect_equal(columns(df[,"age"]), c("age")) - df2 <- df[,c("age", "name")] + expect_is(df[, 1], "DataFrame") + expect_equal(columns(df[, 1]), c("name")) + expect_equal(columns(df[, "age"]), c("age")) + df2 <- df[, c("age", "name")] expect_is(df2, "DataFrame") expect_equal(columns(df2), c("age", "name")) @@ -884,7 +884,7 @@ test_that("drop column", { test_that("subsetting", { # read.json returns columns in random order df <- select(read.json(sqlContext, jsonPath), "name", "age") - filtered <- df[df$age > 20,] + filtered <- df[df$age > 20, ] expect_equal(count(filtered), 1) expect_equal(columns(filtered), c("name", "age")) expect_equal(collect(filtered)$name, "Andy") @@ -903,11 +903,11 @@ test_that("subsetting", { expect_equal(count(df4), 2) expect_equal(columns(df4), c("name", "age")) - df5 <- df[df$age %in% c(19), c(1,2)] + df5 <- df[df$age %in% c(19), c(1, 2)] expect_equal(count(df5), 1) expect_equal(columns(df5), c("name", "age")) - df6 <- subset(df, df$age %in% c(30), c(1,2)) + df6 <- subset(df, df$age %in% c(30), c(1, 2)) expect_equal(count(df6), 1) expect_equal(columns(df6), c("name", "age")) @@ -959,22 +959,22 @@ test_that("test HiveContext", { expect_is(df2, "DataFrame") expect_equal(count(df2), 3) - jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp") invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2)) df3 <- sql(hiveCtx, "select * from json2") expect_is(df3, "DataFrame") expect_equal(count(df3), 3) unlink(jsonPath2) - hivetestDataPath <- tempfile(pattern="sparkr-test", fileext=".tmp") + hivetestDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") invisible(saveAsTable(df, "hivetestbl", path = hivetestDataPath)) df4 <- sql(hiveCtx, "select * from hivetestbl") expect_is(df4, "DataFrame") expect_equal(count(df4), 3) unlink(hivetestDataPath) - parquetDataPath <- tempfile(pattern="sparkr-test", fileext=".tmp") - invisible(saveAsTable(df, "parquetest", "parquet", mode="overwrite", path=parquetDataPath)) + parquetDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") + invisible(saveAsTable(df, "parquetest", "parquet", mode = "overwrite", path = parquetDataPath)) df5 <- sql(hiveCtx, "select * from parquetest") expect_is(df5, "DataFrame") expect_equal(count(df5), 3) @@ -1094,7 +1094,7 @@ test_that("column binary mathfunctions", { "{\"a\":2, \"b\":6}", "{\"a\":3, \"b\":7}", "{\"a\":4, \"b\":8}") - jsonPathWithDup <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPathWithDup <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(lines, jsonPathWithDup) df <- read.json(sqlContext, jsonPathWithDup) expect_equal(collect(select(df, atan2(df$a, df$b)))[1, "ATAN2(a, b)"], atan2(1, 5)) @@ -1244,7 +1244,7 @@ test_that("group by, agg functions", { df3 <- agg(gd, age = "stddev") expect_is(df3, "DataFrame") df3_local <- collect(df3) - expect_true(is.nan(df3_local[df3_local$name == "Andy",][1, 2])) + expect_true(is.nan(df3_local[df3_local$name == "Andy", ][1, 2])) df4 <- agg(gd, sumAge = sum(df$age)) expect_is(df4, "DataFrame") @@ -1264,34 +1264,34 @@ test_that("group by, agg functions", { "{\"name\":\"ID1\", \"value\": \"10\"}", "{\"name\":\"ID1\", \"value\": \"22\"}", "{\"name\":\"ID2\", \"value\": \"-3\"}") - jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLines2, jsonPath2) gd2 <- groupBy(read.json(sqlContext, jsonPath2), "name") df6 <- agg(gd2, value = "sum") df6_local <- collect(df6) - expect_equal(42, df6_local[df6_local$name == "ID1",][1, 2]) - expect_equal(-3, df6_local[df6_local$name == "ID2",][1, 2]) + expect_equal(42, df6_local[df6_local$name == "ID1", ][1, 2]) + expect_equal(-3, df6_local[df6_local$name == "ID2", ][1, 2]) df7 <- agg(gd2, value = "stddev") df7_local <- collect(df7) - expect_true(abs(df7_local[df7_local$name == "ID1",][1, 2] - 6.928203) < 1e-6) - expect_true(is.nan(df7_local[df7_local$name == "ID2",][1, 2])) + expect_true(abs(df7_local[df7_local$name == "ID1", ][1, 2] - 6.928203) < 1e-6) + expect_true(is.nan(df7_local[df7_local$name == "ID2", ][1, 2])) mockLines3 <- c("{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"Justin\", \"age\":19}", "{\"name\":\"Justin\", \"age\":1}") - jsonPath3 <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath3 <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLines3, jsonPath3) df8 <- read.json(sqlContext, jsonPath3) gd3 <- groupBy(df8, "name") gd3_local <- collect(sum(gd3)) - expect_equal(60, gd3_local[gd3_local$name == "Andy",][1, 2]) - expect_equal(20, gd3_local[gd3_local$name == "Justin",][1, 2]) + expect_equal(60, gd3_local[gd3_local$name == "Andy", ][1, 2]) + expect_equal(20, gd3_local[gd3_local$name == "Justin", ][1, 2]) expect_true(abs(collect(agg(df, sd(df$age)))[1, 1] - 7.778175) < 1e-6) gd3_local <- collect(agg(gd3, var(df8$age))) - expect_equal(162, gd3_local[gd3_local$name == "Justin",][1, 2]) + expect_equal(162, gd3_local[gd3_local$name == "Justin", ][1, 2]) # Test stats::sd, stats::var are working expect_true(abs(sd(1:2) - 0.7071068) < 1e-6) @@ -1304,10 +1304,10 @@ test_that("group by, agg functions", { test_that("arrange() and orderBy() on a DataFrame", { df <- read.json(sqlContext, jsonPath) sorted <- arrange(df, df$age) - expect_equal(collect(sorted)[1,2], "Michael") + expect_equal(collect(sorted)[1, 2], "Michael") sorted2 <- arrange(df, "name", decreasing = FALSE) - expect_equal(collect(sorted2)[2,"age"], 19) + expect_equal(collect(sorted2)[2, "age"], 19) sorted3 <- orderBy(df, asc(df$age)) expect_true(is.na(first(sorted3)$age)) @@ -1315,16 +1315,16 @@ test_that("arrange() and orderBy() on a DataFrame", { sorted4 <- orderBy(df, desc(df$name)) expect_equal(first(sorted4)$name, "Michael") - expect_equal(collect(sorted4)[3,"name"], "Andy") + expect_equal(collect(sorted4)[3, "name"], "Andy") sorted5 <- arrange(df, "age", "name", decreasing = TRUE) - expect_equal(collect(sorted5)[1,2], "Andy") + expect_equal(collect(sorted5)[1, 2], "Andy") - sorted6 <- arrange(df, "age","name", decreasing = c(T, F)) - expect_equal(collect(sorted6)[1,2], "Andy") + sorted6 <- arrange(df, "age", "name", decreasing = c(T, F)) + expect_equal(collect(sorted6)[1, 2], "Andy") sorted7 <- arrange(df, "name", decreasing = FALSE) - expect_equal(collect(sorted7)[2,"age"], 19) + expect_equal(collect(sorted7)[2, "age"], 19) }) test_that("filter() on a DataFrame", { @@ -1357,7 +1357,7 @@ test_that("join() and merge() on a DataFrame", { "{\"name\":\"Andy\", \"test\": \"no\"}", "{\"name\":\"Justin\", \"test\": \"yes\"}", "{\"name\":\"Bob\", \"test\": \"yes\"}") - jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLines2, jsonPath2) df2 <- read.json(sqlContext, jsonPath2) @@ -1409,12 +1409,12 @@ test_that("join() and merge() on a DataFrame", { expect_equal(names(merged), c("age", "name_x", "name_y", "test")) expect_equal(collect(orderBy(merged, merged$name_x))$age[3], 19) - merged <- merge(df, df2, suffixes = c("-X","-Y")) + merged <- merge(df, df2, suffixes = c("-X", "-Y")) expect_equal(count(merged), 3) expect_equal(names(merged), c("age", "name-X", "name-Y", "test")) expect_equal(collect(orderBy(merged, merged$"name-X"))$age[1], 30) - merged <- merge(df, df2, by = "name", suffixes = c("-X","-Y"), sort = FALSE) + merged <- merge(df, df2, by = "name", suffixes = c("-X", "-Y"), sort = FALSE) expect_equal(count(merged), 3) expect_equal(names(merged), c("age", "name-X", "name-Y", "test")) expect_equal(collect(orderBy(merged, merged$"name-Y"))$"name-X"[3], "Michael") @@ -1432,7 +1432,7 @@ test_that("join() and merge() on a DataFrame", { "{\"name\":\"Andy\", \"name_y\":\"Andy\", \"test\": \"no\"}", "{\"name\":\"Justin\", \"name_y\":\"Justin\", \"test\": \"yes\"}", "{\"name\":\"Bob\", \"name_y\":\"Bob\", \"test\": \"yes\"}") - jsonPath3 <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath3 <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLines3, jsonPath3) df3 <- read.json(sqlContext, jsonPath3) expect_error(merge(df, df3), @@ -1460,8 +1460,8 @@ test_that("showDF()", { "|null|Michael|\n", "| 30| Andy|\n", "| 19| Justin|\n", - "+----+-------+\n", sep="") - expect_output(s , expected) + "+----+-------+\n", sep = "") + expect_output(s, expected) }) test_that("isLocal()", { @@ -1475,7 +1475,7 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", { lines <- c("{\"name\":\"Bob\", \"age\":24}", "{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"James\", \"age\":35}") - jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp") + jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(lines, jsonPath2) df2 <- read.df(sqlContext, jsonPath2, "json") @@ -1558,7 +1558,7 @@ test_that("mutate(), transform(), rename() and names()", { test_that("read/write Parquet files", { df <- read.df(sqlContext, jsonPath, "json") # Test write.df and read.df - write.df(df, parquetPath, "parquet", mode="overwrite") + write.df(df, parquetPath, "parquet", mode = "overwrite") df2 <- read.df(sqlContext, parquetPath, "parquet") expect_is(df2, "DataFrame") expect_equal(count(df2), 3) @@ -1593,7 +1593,7 @@ test_that("read/write text files", { expect_equal(colnames(df), c("value")) expect_equal(count(df), 3) textPath <- tempfile(pattern = "textPath", fileext = ".txt") - write.df(df, textPath, "text", mode="overwrite") + write.df(df, textPath, "text", mode = "overwrite") # Test write.text and read.text textPath2 <- tempfile(pattern = "textPath2", fileext = ".txt") @@ -1631,13 +1631,13 @@ test_that("dropna() and na.omit() on a DataFrame", { # drop with columns - expected <- rows[!is.na(rows$name),] + expected <- rows[!is.na(rows$name), ] actual <- collect(dropna(df, cols = "name")) expect_identical(expected, actual) actual <- collect(na.omit(df, cols = "name")) expect_identical(expected, actual) - expected <- rows[!is.na(rows$age),] + expected <- rows[!is.na(rows$age), ] actual <- collect(dropna(df, cols = "age")) row.names(expected) <- row.names(actual) # identical on two dataframes does not work here. Don't know why. @@ -1647,13 +1647,13 @@ test_that("dropna() and na.omit() on a DataFrame", { expect_identical(expected$name, actual$name) actual <- collect(na.omit(df, cols = "age")) - expected <- rows[!is.na(rows$age) & !is.na(rows$height),] + expected <- rows[!is.na(rows$age) & !is.na(rows$height), ] actual <- collect(dropna(df, cols = c("age", "height"))) expect_identical(expected, actual) actual <- collect(na.omit(df, cols = c("age", "height"))) expect_identical(expected, actual) - expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),] + expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name), ] actual <- collect(dropna(df)) expect_identical(expected, actual) actual <- collect(na.omit(df)) @@ -1661,31 +1661,31 @@ test_that("dropna() and na.omit() on a DataFrame", { # drop with how - expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),] + expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name), ] actual <- collect(dropna(df)) expect_identical(expected, actual) actual <- collect(na.omit(df)) expect_identical(expected, actual) - expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),] + expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name), ] actual <- collect(dropna(df, "all")) expect_identical(expected, actual) actual <- collect(na.omit(df, "all")) expect_identical(expected, actual) - expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),] + expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name), ] actual <- collect(dropna(df, "any")) expect_identical(expected, actual) actual <- collect(na.omit(df, "any")) expect_identical(expected, actual) - expected <- rows[!is.na(rows$age) & !is.na(rows$height),] + expected <- rows[!is.na(rows$age) & !is.na(rows$height), ] actual <- collect(dropna(df, "any", cols = c("age", "height"))) expect_identical(expected, actual) actual <- collect(na.omit(df, "any", cols = c("age", "height"))) expect_identical(expected, actual) - expected <- rows[!is.na(rows$age) | !is.na(rows$height),] + expected <- rows[!is.na(rows$age) | !is.na(rows$height), ] actual <- collect(dropna(df, "all", cols = c("age", "height"))) expect_identical(expected, actual) actual <- collect(na.omit(df, "all", cols = c("age", "height"))) @@ -1693,7 +1693,7 @@ test_that("dropna() and na.omit() on a DataFrame", { # drop with threshold - expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2,] + expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2, ] actual <- collect(dropna(df, minNonNulls = 2, cols = c("age", "height"))) expect_identical(expected, actual) actual <- collect(na.omit(df, minNonNulls = 2, cols = c("age", "height"))) @@ -1701,7 +1701,7 @@ test_that("dropna() and na.omit() on a DataFrame", { expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) + - as.integer(!is.na(rows$name)) >= 3,] + as.integer(!is.na(rows$name)) >= 3, ] actual <- collect(dropna(df, minNonNulls = 3, cols = c("name", "age", "height"))) expect_identical(expected, actual) actual <- collect(na.omit(df, minNonNulls = 3, cols = c("name", "age", "height"))) @@ -1754,7 +1754,7 @@ test_that("crosstab() on a DataFrame", { }) df <- toDF(rdd, list("a", "b")) ct <- crosstab(df, "a", "b") - ordered <- ct[order(ct$a_b),] + ordered <- ct[order(ct$a_b), ] row.names(ordered) <- NULL expected <- data.frame("a_b" = c("a0", "a1", "a2"), "b0" = c(1, 0, 1), "b1" = c(1, 1, 0), stringsAsFactors = FALSE, row.names = NULL) @@ -1782,10 +1782,10 @@ test_that("freqItems() on a DataFrame", { negDoubles = input * -1.0, stringsAsFactors = F) rdf[ input %% 3 == 0, ] <- c(1, "1", -1) df <- createDataFrame(sqlContext, rdf) - multiColResults <- freqItems(df, c("numbers", "letters"), support=0.1) + multiColResults <- freqItems(df, c("numbers", "letters"), support = 0.1) expect_true(1 %in% multiColResults$numbers[[1]]) expect_true("1" %in% multiColResults$letters[[1]]) - singleColResult <- freqItems(df, "negDoubles", support=0.1) + singleColResult <- freqItems(df, "negDoubles", support = 0.1) expect_true(-1 %in% head(singleColResult$negDoubles)[[1]]) l <- lapply(c(0:99), function(i) { @@ -1860,9 +1860,9 @@ test_that("with() on a DataFrame", { test_that("Method coltypes() to get and set R's data types of a DataFrame", { expect_equal(coltypes(irisDF), c(rep("numeric", 4), "character")) - data <- data.frame(c1=c(1,2,3), - c2=c(T,F,T), - c3=c("2015/01/01 10:00:00", "2015/01/02 10:00:00", "2015/01/03 10:00:00")) + data <- data.frame(c1 = c(1, 2, 3), + c2 = c(T, F, T), + c3 = c("2015/01/01 10:00:00", "2015/01/02 10:00:00", "2015/01/03 10:00:00")) schema <- structType(structField("c1", "byte"), structField("c3", "boolean"), @@ -1874,7 +1874,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", { # Test complex types x <- createDataFrame(sqlContext, list(list(as.environment( - list("a"="b", "c"="d", "e"="f"))))) + list("a" = "b", "c" = "d", "e" = "f"))))) expect_equal(coltypes(x), "map") df <- selectExpr(read.json(sqlContext, jsonPath), "name", "(age * 1.21) as age") @@ -1918,7 +1918,7 @@ test_that("Method str()", { # the number of columns. Therefore, it will suffice to check for the # number of returned rows x <- runif(200, 1, 10) - df <- data.frame(t(as.matrix(data.frame(x,x,x,x,x,x,x,x,x)))) + df <- data.frame(t(as.matrix(data.frame(x, x, x, x, x, x, x, x, x)))) DF <- createDataFrame(sqlContext, df) out <- capture.output(str(DF)) expect_equal(length(out), 103) diff --git a/R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/inst/tests/testthat/test_textFile.R index a9cf83dbdb..e64ef1bb31 100644 --- a/R/pkg/inst/tests/testthat/test_textFile.R +++ b/R/pkg/inst/tests/testthat/test_textFile.R @@ -23,7 +23,7 @@ sc <- sparkR.init() mockFile <- c("Spark is pretty.", "Spark is awesome.") test_that("textFile() on a local file returns an RDD", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) @@ -35,7 +35,7 @@ test_that("textFile() on a local file returns an RDD", { }) test_that("textFile() followed by a collect() returns the same content", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) @@ -45,7 +45,7 @@ test_that("textFile() followed by a collect() returns the same content", { }) test_that("textFile() word count works as expected", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) @@ -63,7 +63,7 @@ test_that("textFile() word count works as expected", { }) test_that("several transformations on RDD created by textFile()", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) # RDD @@ -77,8 +77,8 @@ test_that("several transformations on RDD created by textFile()", { }) test_that("textFile() followed by a saveAsTextFile() returns the same content", { - fileName1 <- tempfile(pattern="spark-test", fileext=".tmp") - fileName2 <- tempfile(pattern="spark-test", fileext=".tmp") + fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") + fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) rdd <- textFile(sc, fileName1, 1L) @@ -91,7 +91,7 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content", }) test_that("saveAsTextFile() on a parallelized list works as expected", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") l <- list(1, 2, 3) rdd <- parallelize(sc, l, 1L) saveAsTextFile(rdd, fileName) @@ -102,8 +102,8 @@ test_that("saveAsTextFile() on a parallelized list works as expected", { }) test_that("textFile() and saveAsTextFile() word count works as expected", { - fileName1 <- tempfile(pattern="spark-test", fileext=".tmp") - fileName2 <- tempfile(pattern="spark-test", fileext=".tmp") + fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") + fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) rdd <- textFile(sc, fileName1) @@ -127,8 +127,8 @@ test_that("textFile() and saveAsTextFile() word count works as expected", { }) test_that("textFile() on multiple paths", { - fileName1 <- tempfile(pattern="spark-test", fileext=".tmp") - fileName2 <- tempfile(pattern="spark-test", fileext=".tmp") + fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") + fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines("Spark is pretty.", fileName1) writeLines("Spark is awesome.", fileName2) @@ -140,7 +140,7 @@ test_that("textFile() on multiple paths", { }) test_that("Pipelined operations on RDDs created using textFile", { - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 56f14a3bce..4218138f64 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -41,7 +41,7 @@ test_that("convertJListToRList() gives back (deserializes) the original JLists test_that("serializeToBytes on RDD", { # File content mockFile <- c("Spark is pretty.", "Spark is awesome.") - fileName <- tempfile(pattern="spark-test", fileext=".tmp") + fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) text.rdd <- textFile(sc, fileName) @@ -86,8 +86,8 @@ test_that("cleanClosure on R functions", { f <- function(x) { defUse <- base::as.integer(x) + 1 # Test for access operators `::`. lapply(x, g) + 1 # Test for capturing function call "g"'s closure as a argument of lapply. - l$field[1,1] <- 3 # Test for access operators `$`. - res <- defUse + l$field[1,] # Test for def-use chain of "defUse", and "" symbol. + l$field[1, 1] <- 3 # Test for access operators `$`. + res <- defUse + l$field[1, ] # Test for def-use chain of "defUse", and "" symbol. f(res) # Test for recursive calls. } newF <- cleanClosure(f) @@ -132,7 +132,7 @@ test_that("cleanClosure on R functions", { expect_equal(actual, expected) # Test for broadcast variables. - a <- matrix(nrow=10, ncol=10, data=rnorm(100)) + a <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) aBroadcast <- broadcast(sc, a) normMultiply <- function(x) { norm(aBroadcast$value) * x } newnormMultiply <- SparkR:::cleanClosure(normMultiply) diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R index 3ae072beca..b6784dbae3 100644 --- a/R/pkg/inst/worker/worker.R +++ b/R/pkg/inst/worker/worker.R @@ -55,7 +55,7 @@ serializer <- SparkR:::readString(inputCon) # Include packages as required packageNames <- unserialize(SparkR:::readRaw(inputCon)) for (pkg in packageNames) { - suppressPackageStartupMessages(library(as.character(pkg), character.only=TRUE)) + suppressPackageStartupMessages(library(as.character(pkg), character.only = TRUE)) } # read function dependencies -- cgit v1.2.3