From c34b546d674ce186f13d9999b97977bc281cfedf Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Tue, 16 Aug 2016 11:19:18 -0700 Subject: [SPARK-16519][SPARKR] Handle SparkR RDD generics that create warnings in R CMD check ## What changes were proposed in this pull request? Rename RDD functions for now to avoid CRAN check warnings. Some RDD functions are sharing generics with DataFrame functions (hence the problem) so after the renames we need to add new generics, for now. ## How was this patch tested? unit tests Author: Felix Cheung Closes #14626 from felixcheung/rrddfunctions. --- R/pkg/R/RDD.R | 100 ++++++------ R/pkg/R/SQLContext.R | 2 +- R/pkg/R/context.R | 2 +- R/pkg/R/generics.R | 91 +++++++---- R/pkg/R/pairRDD.R | 40 ++--- R/pkg/inst/tests/testthat/test_binaryFile.R | 8 +- R/pkg/inst/tests/testthat/test_binary_function.R | 18 +-- R/pkg/inst/tests/testthat/test_broadcast.R | 4 +- R/pkg/inst/tests/testthat/test_context.R | 6 +- R/pkg/inst/tests/testthat/test_includePackage.R | 4 +- .../inst/tests/testthat/test_parallelize_collect.R | 26 ++-- R/pkg/inst/tests/testthat/test_rdd.R | 172 ++++++++++----------- R/pkg/inst/tests/testthat/test_shuffle.R | 34 ++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 28 ++-- R/pkg/inst/tests/testthat/test_take.R | 32 ++-- R/pkg/inst/tests/testthat/test_textFile.R | 26 ++-- R/pkg/inst/tests/testthat/test_utils.R | 6 +- 17 files changed, 312 insertions(+), 287 deletions(-) (limited to 'R') diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 72a8052565..6b254bb0d3 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -67,7 +67,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode, .Object }) -setMethod("show", "RDD", +setMethod("showRDD", "RDD", function(object) { cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = "")) }) @@ -215,7 +215,7 @@ setValidity("RDD", #' @rdname cache-methods #' @aliases cache,RDD-method #' @noRd -setMethod("cache", +setMethod("cacheRDD", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "cache") @@ -235,12 +235,12 @@ setMethod("cache", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) -#' persist(rdd, "MEMORY_AND_DISK") +#' persistRDD(rdd, "MEMORY_AND_DISK") #'} #' @rdname persist #' @aliases persist,RDD-method #' @noRd -setMethod("persist", +setMethod("persistRDD", signature(x = "RDD", newLevel = "character"), function(x, newLevel = "MEMORY_ONLY") { callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel)) @@ -259,12 +259,12 @@ setMethod("persist", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) #' cache(rdd) # rdd@@env$isCached == TRUE -#' unpersist(rdd) # rdd@@env$isCached == FALSE +#' unpersistRDD(rdd) # rdd@@env$isCached == FALSE #'} #' @rdname unpersist-methods #' @aliases unpersist,RDD-method #' @noRd -setMethod("unpersist", +setMethod("unpersistRDD", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "unpersist") @@ -345,13 +345,13 @@ setMethod("numPartitions", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10, 2L) -#' collect(rdd) # list from 1 to 10 +#' collectRDD(rdd) # list from 1 to 10 #' collectPartition(rdd, 0L) # list from 1 to 5 #'} #' @rdname collect-methods #' @aliases collect,RDD-method #' @noRd -setMethod("collect", +setMethod("collectRDD", signature(x = "RDD"), function(x, flatten = TRUE) { # Assumes a pairwise RDD is backed by a JavaPairRDD. @@ -397,7 +397,7 @@ setMethod("collectPartition", setMethod("collectAsMap", signature(x = "RDD"), function(x) { - pairList <- collect(x) + pairList <- collectRDD(x) map <- new.env() lapply(pairList, function(i) { assign(as.character(i[[1]]), i[[2]], envir = map) }) as.list(map) @@ -411,30 +411,30 @@ setMethod("collectAsMap", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) -#' count(rdd) # 10 +#' countRDD(rdd) # 10 #' length(rdd) # Same as count #'} #' @rdname count #' @aliases count,RDD-method #' @noRd -setMethod("count", +setMethod("countRDD", signature(x = "RDD"), function(x) { countPartition <- function(part) { as.integer(length(part)) } valsRDD <- lapplyPartition(x, countPartition) - vals <- collect(valsRDD) + vals <- collectRDD(valsRDD) sum(as.integer(vals)) }) #' Return the number of elements in the RDD #' @rdname count #' @noRd -setMethod("length", +setMethod("lengthRDD", signature(x = "RDD"), function(x) { - count(x) + countRDD(x) }) #' Return the count of each unique value in this RDD as a list of @@ -460,7 +460,7 @@ setMethod("countByValue", signature(x = "RDD"), function(x) { ones <- lapply(x, function(item) { list(item, 1L) }) - collect(reduceByKey(ones, `+`, getNumPartitions(x))) + collectRDD(reduceByKey(ones, `+`, getNumPartitions(x))) }) #' Apply a function to all elements @@ -479,7 +479,7 @@ setMethod("countByValue", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) #' multiplyByTwo <- lapply(rdd, function(x) { x * 2 }) -#' collect(multiplyByTwo) # 2,4,6... +#' collectRDD(multiplyByTwo) # 2,4,6... #'} setMethod("lapply", signature(X = "RDD", FUN = "function"), @@ -512,7 +512,7 @@ setMethod("map", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) #' multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) }) -#' collect(multiplyByTwo) # 2,20,4,40,6,60... +#' collectRDD(multiplyByTwo) # 2,20,4,40,6,60... #'} #' @rdname flatMap #' @aliases flatMap,RDD,function-method @@ -541,7 +541,7 @@ setMethod("flatMap", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) #' partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) }) -#' collect(partitionSum) # 15, 40 +#' collectRDD(partitionSum) # 15, 40 #'} #' @rdname lapplyPartition #' @aliases lapplyPartition,RDD,function-method @@ -576,7 +576,7 @@ setMethod("mapPartitions", #' rdd <- parallelize(sc, 1:10, 5L) #' prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) { #' partIndex * Reduce("+", part) }) -#' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76 +#' collectRDD(prod, flatten = FALSE) # 0, 7, 22, 45, 76 #'} #' @rdname lapplyPartitionsWithIndex #' @aliases lapplyPartitionsWithIndex,RDD,function-method @@ -607,7 +607,7 @@ setMethod("mapPartitionsWithIndex", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) -#' unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2) +#' unlist(collectRDD(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2) #'} # nolint end #' @rdname filterRDD @@ -656,7 +656,7 @@ setMethod("reduce", Reduce(func, part) } - partitionList <- collect(lapplyPartition(x, reducePartition), + partitionList <- collectRDD(lapplyPartition(x, reducePartition), flatten = FALSE) Reduce(func, partitionList) }) @@ -736,7 +736,7 @@ setMethod("foreach", lapply(x, func) NULL } - invisible(collect(mapPartitions(x, partition.func))) + invisible(collectRDD(mapPartitions(x, partition.func))) }) #' Applies a function to each partition in an RDD, and forces evaluation. @@ -753,7 +753,7 @@ setMethod("foreach", setMethod("foreachPartition", signature(x = "RDD", func = "function"), function(x, func) { - invisible(collect(mapPartitions(x, func))) + invisible(collectRDD(mapPartitions(x, func))) }) #' Take elements from an RDD. @@ -768,13 +768,13 @@ setMethod("foreachPartition", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) -#' take(rdd, 2L) # list(1, 2) +#' takeRDD(rdd, 2L) # list(1, 2) #'} # nolint end #' @rdname take #' @aliases take,RDD,numeric-method #' @noRd -setMethod("take", +setMethod("takeRDD", signature(x = "RDD", num = "numeric"), function(x, num) { resList <- list() @@ -817,13 +817,13 @@ setMethod("take", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) -#' first(rdd) +#' firstRDD(rdd) #' } #' @noRd -setMethod("first", +setMethod("firstRDD", signature(x = "RDD"), function(x) { - take(x, 1)[[1]] + takeRDD(x, 1)[[1]] }) #' Removes the duplicates from RDD. @@ -838,13 +838,13 @@ setMethod("first", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, c(1,2,2,3,3,3)) -#' sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3) +#' sort(unlist(collectRDD(distinctRDD(rdd)))) # c(1, 2, 3) #'} # nolint end #' @rdname distinct #' @aliases distinct,RDD-method #' @noRd -setMethod("distinct", +setMethod("distinctRDD", signature(x = "RDD"), function(x, numPartitions = SparkR:::getNumPartitions(x)) { identical.mapped <- lapply(x, function(x) { list(x, NULL) }) @@ -868,8 +868,8 @@ setMethod("distinct", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) -#' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements -#' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates +#' collectRDD(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements +#' collectRDD(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates #'} #' @rdname sampleRDD #' @aliases sampleRDD,RDD @@ -942,7 +942,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", fraction <- 0.0 total <- 0 multiplier <- 3.0 - initialCount <- count(x) + initialCount <- countRDD(x) maxSelected <- 0 MAXINT <- .Machine$integer.max @@ -964,7 +964,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", } set.seed(seed) - samples <- collect(sampleRDD(x, withReplacement, fraction, + samples <- collectRDD(sampleRDD(x, withReplacement, fraction, as.integer(ceiling(runif(1, -MAXINT, MAXINT))))) @@ -972,7 +972,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", # take samples; this shouldn't happen often because we use a big # multiplier for thei initial size while (length(samples) < total) - samples <- collect(sampleRDD(x, withReplacement, fraction, + samples <- collectRDD(sampleRDD(x, withReplacement, fraction, as.integer(ceiling(runif(1, -MAXINT, MAXINT))))) @@ -990,7 +990,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(1, 2, 3)) -#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3)) +#' collectRDD(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3)) #'} # nolint end #' @rdname keyBy @@ -1019,12 +1019,12 @@ setMethod("keyBy", #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L) #' getNumPartitions(rdd) # 4 -#' getNumPartitions(repartition(rdd, 2L)) # 2 +#' getNumPartitions(repartitionRDD(rdd, 2L)) # 2 #'} #' @rdname repartition #' @aliases repartition,RDD #' @noRd -setMethod("repartition", +setMethod("repartitionRDD", signature(x = "RDD"), function(x, numPartitions) { if (!is.null(numPartitions) && is.numeric(numPartitions)) { @@ -1064,7 +1064,7 @@ setMethod("coalesce", }) } shuffled <- lapplyPartitionsWithIndex(x, func) - repartitioned <- partitionBy(shuffled, numPartitions) + repartitioned <- partitionByRDD(shuffled, numPartitions) values(repartitioned) } else { jrdd <- callJMethod(getJRDD(x), "coalesce", numPartitions, shuffle) @@ -1135,7 +1135,7 @@ setMethod("saveAsTextFile", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(3, 2, 1)) -#' collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3) +#' collectRDD(sortBy(rdd, function(x) { x })) # list (1, 2, 3) #'} # nolint end #' @rdname sortBy @@ -1304,7 +1304,7 @@ setMethod("aggregateRDD", Reduce(seqOp, part, zeroValue) } - partitionList <- collect(lapplyPartition(x, partitionFunc), + partitionList <- collectRDD(lapplyPartition(x, partitionFunc), flatten = FALSE) Reduce(combOp, partitionList, zeroValue) }) @@ -1322,7 +1322,7 @@ setMethod("aggregateRDD", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) -#' collect(pipeRDD(rdd, "more") +#' pipeRDD(rdd, "more") #' Output: c("1", "2", ..., "10") #'} #' @aliases pipeRDD,RDD,character-method @@ -1397,7 +1397,7 @@ setMethod("setName", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) -#' collect(zipWithUniqueId(rdd)) +#' collectRDD(zipWithUniqueId(rdd)) #' # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2)) #'} # nolint end @@ -1440,7 +1440,7 @@ setMethod("zipWithUniqueId", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) -#' collect(zipWithIndex(rdd)) +#' collectRDD(zipWithIndex(rdd)) #' # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4)) #'} # nolint end @@ -1452,7 +1452,7 @@ setMethod("zipWithIndex", function(x) { n <- getNumPartitions(x) if (n > 1) { - nums <- collect(lapplyPartition(x, + nums <- collectRDD(lapplyPartition(x, function(part) { list(length(part)) })) @@ -1488,7 +1488,7 @@ setMethod("zipWithIndex", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, as.list(1:4), 2L) -#' collect(glom(rdd)) +#' collectRDD(glom(rdd)) #' # list(list(1, 2), list(3, 4)) #'} # nolint end @@ -1556,7 +1556,7 @@ setMethod("unionRDD", #' sc <- sparkR.init() #' rdd1 <- parallelize(sc, 0:4) #' rdd2 <- parallelize(sc, 1000:1004) -#' collect(zipRDD(rdd1, rdd2)) +#' collectRDD(zipRDD(rdd1, rdd2)) #' # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004)) #'} # nolint end @@ -1628,7 +1628,7 @@ setMethod("cartesian", #' sc <- sparkR.init() #' rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4)) #' rdd2 <- parallelize(sc, list(2, 4)) -#' collect(subtract(rdd1, rdd2)) +#' collectRDD(subtract(rdd1, rdd2)) #' # list(1, 1, 3) #'} # nolint end @@ -1662,7 +1662,7 @@ setMethod("subtract", #' sc <- sparkR.init() #' rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5)) #' rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8)) -#' collect(sortBy(intersection(rdd1, rdd2), function(x) { x })) +#' collectRDD(sortBy(intersection(rdd1, rdd2), function(x) { x })) #' # list(1, 2, 3) #'} # nolint end @@ -1699,7 +1699,7 @@ setMethod("intersection", #' rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2 #' rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4 #' rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6 -#' collect(zipPartitions(rdd1, rdd2, rdd3, +#' collectRDD(zipPartitions(rdd1, rdd2, rdd3, #' func = function(x, y, z) { list(list(x, y, z))} )) #' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6))) #'} diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index a14bcd91b3..0c06bba639 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -218,7 +218,7 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { } if (is.null(schema) || (!inherits(schema, "structType") && is.null(names(schema)))) { - row <- first(rdd) + row <- firstRDD(rdd) names <- if (is.null(schema)) { names(row) } else { diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 2538bb2507..13ade49eab 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -267,7 +267,7 @@ spark.lapply <- function(list, func) { sc <- getSparkContext() rdd <- parallelize(sc, list, length(list)) results <- map(rdd, func) - local <- collect(results) + local <- collectRDD(results) local } diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 10a09129ec..52ab730e21 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -23,9 +23,7 @@ setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") }) -# @rdname cache-methods -# @export -setGeneric("cache", function(x) { standardGeneric("cache") }) +setGeneric("cacheRDD", function(x) { standardGeneric("cacheRDD") }) # @rdname coalesce # @seealso repartition @@ -36,9 +34,7 @@ setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coales # @export setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") }) -# @rdname collect-methods -# @export -setGeneric("collect", function(x, ...) { standardGeneric("collect") }) +setGeneric("collectRDD", function(x, ...) { standardGeneric("collectRDD") }) # @rdname collect-methods # @export @@ -51,9 +47,9 @@ setGeneric("collectPartition", standardGeneric("collectPartition") }) -# @rdname nrow -# @export -setGeneric("count", function(x) { standardGeneric("count") }) +setGeneric("countRDD", function(x) { standardGeneric("countRDD") }) + +setGeneric("lengthRDD", function(x) { standardGeneric("lengthRDD") }) # @rdname countByValue # @export @@ -74,17 +70,13 @@ setGeneric("approxQuantile", standardGeneric("approxQuantile") }) -# @rdname distinct -# @export -setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") }) +setGeneric("distinctRDD", function(x, numPartitions = 1) { standardGeneric("distinctRDD") }) # @rdname filterRDD # @export setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") }) -# @rdname first -# @export -setGeneric("first", function(x, ...) { standardGeneric("first") }) +setGeneric("firstRDD", function(x, ...) { standardGeneric("firstRDD") }) # @rdname flatMap # @export @@ -110,6 +102,8 @@ setGeneric("glom", function(x) { standardGeneric("glom") }) # @export setGeneric("histogram", function(df, col, nbins=10) { standardGeneric("histogram") }) +setGeneric("joinRDD", function(x, y, ...) { standardGeneric("joinRDD") }) + # @rdname keyBy # @export setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") }) @@ -152,9 +146,7 @@ setGeneric("getNumPartitions", function(x) { standardGeneric("getNumPartitions") # @export setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") }) -# @rdname persist -# @export -setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) +setGeneric("persistRDD", function(x, newLevel) { standardGeneric("persistRDD") }) # @rdname pipeRDD # @export @@ -168,10 +160,7 @@ setGeneric("pivot", function(x, colname, values = list()) { standardGeneric("piv # @export setGeneric("reduce", function(x, func) { standardGeneric("reduce") }) -# @rdname repartition -# @seealso coalesce -# @export -setGeneric("repartition", function(x, ...) { standardGeneric("repartition") }) +setGeneric("repartitionRDD", function(x, ...) { standardGeneric("repartitionRDD") }) # @rdname sampleRDD # @export @@ -193,6 +182,8 @@ setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile # @export setGeneric("setName", function(x, name) { standardGeneric("setName") }) +setGeneric("showRDD", function(object, ...) { standardGeneric("showRDD") }) + # @rdname sortBy # @export setGeneric("sortBy", @@ -200,9 +191,7 @@ setGeneric("sortBy", standardGeneric("sortBy") }) -# @rdname take -# @export -setGeneric("take", function(x, num) { standardGeneric("take") }) +setGeneric("takeRDD", function(x, num) { standardGeneric("takeRDD") }) # @rdname takeOrdered # @export @@ -223,9 +212,7 @@ setGeneric("top", function(x, num) { standardGeneric("top") }) # @export setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") }) -# @rdname unpersist-methods -# @export -setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") }) +setGeneric("unpersistRDD", function(x, ...) { standardGeneric("unpersistRDD") }) # @rdname zipRDD # @export @@ -343,9 +330,7 @@ setGeneric("join", function(x, y, ...) { standardGeneric("join") }) # @export setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") }) -#' @rdname partitionBy -#' @export -setGeneric("partitionBy", function(x, ...) { standardGeneric("partitionBy") }) +setGeneric("partitionByRDD", function(x, ...) { standardGeneric("partitionByRDD") }) # @rdname reduceByKey # @seealso groupByKey @@ -414,6 +399,14 @@ setGeneric("as.data.frame", #' @export setGeneric("attach") +#' @rdname cache +#' @export +setGeneric("cache", function(x) { standardGeneric("cache") }) + +#' @rdname collect +#' @export +setGeneric("collect", function(x, ...) { standardGeneric("collect") }) + #' @rdname columns #' @export setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") }) @@ -434,6 +427,10 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) +#' @rdname nrow +#' @export +setGeneric("count", function(x) { standardGeneric("count") }) + #' @rdname cov #' @export setGeneric("cov", function(x, ...) {standardGeneric("cov") }) @@ -477,6 +474,10 @@ setGeneric("gapplyCollect", function(x, ...) { standardGeneric("gapplyCollect") #' @export setGeneric("describe", function(x, col, ...) { standardGeneric("describe") }) +#' @rdname distinct +#' @export +setGeneric("distinct", function(x) { standardGeneric("distinct") }) + #' @rdname drop #' @export setGeneric("drop", function(x, ...) { standardGeneric("drop") }) @@ -519,6 +520,10 @@ setGeneric("fillna", function(x, value, cols = NULL) { standardGeneric("fillna") #' @export setGeneric("filter", function(x, condition) { standardGeneric("filter") }) +#' @rdname first +#' @export +setGeneric("first", function(x, ...) { standardGeneric("first") }) + #' @rdname groupBy #' @export setGeneric("group_by", function(x, ...) { standardGeneric("group_by") }) @@ -555,17 +560,25 @@ setGeneric("mutate", function(.data, ...) {standardGeneric("mutate") }) #' @export setGeneric("orderBy", function(x, col, ...) { standardGeneric("orderBy") }) +#' @rdname persist +#' @export +setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) + #' @rdname printSchema #' @export setGeneric("printSchema", function(x) { standardGeneric("printSchema") }) +#' @rdname registerTempTable-deprecated +#' @export +setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") }) + #' @rdname rename #' @export setGeneric("rename", function(x, ...) { standardGeneric("rename") }) -#' @rdname registerTempTable-deprecated +#' @rdname repartition #' @export -setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") }) +setGeneric("repartition", function(x, ...) { standardGeneric("repartition") }) #' @rdname sample #' @export @@ -592,6 +605,10 @@ setGeneric("saveAsTable", function(df, tableName, source = NULL, mode = "error", #' @export setGeneric("str") +#' @rdname take +#' @export +setGeneric("take", function(x, num) { standardGeneric("take") }) + #' @rdname mutate #' @export setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") }) @@ -674,6 +691,10 @@ setGeneric("union", function(x, y) { standardGeneric("union") }) #' @export setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") }) +#' @rdname unpersist-methods +#' @export +setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") }) + #' @rdname filter #' @export setGeneric("where", function(x, condition) { standardGeneric("where") }) @@ -771,6 +792,10 @@ setGeneric("over", function(x, window) { standardGeneric("over") }) ###################### WindowSpec Methods ########################## +#' @rdname partitionBy +#' @export +setGeneric("partitionBy", function(x, ...) { standardGeneric("partitionBy") }) + #' @rdname rowsBetween #' @export setGeneric("rowsBetween", function(x, start, end) { standardGeneric("rowsBetween") }) diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index d39775cabe..f0605db1e9 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -49,7 +49,7 @@ setMethod("lookup", lapply(filtered, function(i) { i[[2]] }) } valsRDD <- lapplyPartition(x, partitionFunc) - collect(valsRDD) + collectRDD(valsRDD) }) #' Count the number of elements for each key, and return the result to the @@ -85,7 +85,7 @@ setMethod("countByKey", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) -#' collect(keys(rdd)) # list(1, 3) +#' collectRDD(keys(rdd)) # list(1, 3) #'} # nolint end #' @rdname keys @@ -108,7 +108,7 @@ setMethod("keys", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) -#' collect(values(rdd)) # list(2, 4) +#' collectRDD(values(rdd)) # list(2, 4) #'} # nolint end #' @rdname values @@ -135,7 +135,7 @@ setMethod("values", #' sc <- sparkR.init() #' rdd <- parallelize(sc, 1:10) #' makePairs <- lapply(rdd, function(x) { list(x, x) }) -#' collect(mapValues(makePairs, function(x) { x * 2) }) +#' collectRDD(mapValues(makePairs, function(x) { x * 2) }) #' Output: list(list(1,2), list(2,4), list(3,6), ...) #'} #' @rdname mapValues @@ -162,7 +162,7 @@ setMethod("mapValues", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4)))) -#' collect(flatMapValues(rdd, function(x) { x })) +#' collectRDD(flatMapValues(rdd, function(x) { x })) #' Output: list(list(1,1), list(1,2), list(2,3), list(2,4)) #'} #' @rdname flatMapValues @@ -198,13 +198,13 @@ setMethod("flatMapValues", #' sc <- sparkR.init() #' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) #' rdd <- parallelize(sc, pairs) -#' parts <- partitionBy(rdd, 2L) +#' parts <- partitionByRDD(rdd, 2L) #' collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4) #'} #' @rdname partitionBy #' @aliases partitionBy,RDD,integer-method #' @noRd -setMethod("partitionBy", +setMethod("partitionByRDD", signature(x = "RDD"), function(x, numPartitions, partitionFunc = hashCode) { stopifnot(is.numeric(numPartitions)) @@ -261,7 +261,7 @@ setMethod("partitionBy", #' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) #' rdd <- parallelize(sc, pairs) #' parts <- groupByKey(rdd, 2L) -#' grouped <- collect(parts) +#' grouped <- collectRDD(parts) #' grouped[[1]] # Should be a list(1, list(2, 4)) #'} #' @rdname groupByKey @@ -270,7 +270,7 @@ setMethod("partitionBy", setMethod("groupByKey", signature(x = "RDD", numPartitions = "numeric"), function(x, numPartitions) { - shuffled <- partitionBy(x, numPartitions) + shuffled <- partitionByRDD(x, numPartitions) groupVals <- function(part) { vals <- new.env() keys <- new.env() @@ -321,7 +321,7 @@ setMethod("groupByKey", #' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) #' rdd <- parallelize(sc, pairs) #' parts <- reduceByKey(rdd, "+", 2L) -#' reduced <- collect(parts) +#' reduced <- collectRDD(parts) #' reduced[[1]] # Should be a list(1, 6) #'} #' @rdname reduceByKey @@ -342,7 +342,7 @@ setMethod("reduceByKey", convertEnvsToList(keys, vals) } locallyReduced <- lapplyPartition(x, reduceVals) - shuffled <- partitionBy(locallyReduced, numToInt(numPartitions)) + shuffled <- partitionByRDD(locallyReduced, numToInt(numPartitions)) lapplyPartition(shuffled, reduceVals) }) @@ -430,7 +430,7 @@ setMethod("reduceByKeyLocally", #' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) #' rdd <- parallelize(sc, pairs) #' parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L) -#' combined <- collect(parts) +#' combined <- collectRDD(parts) #' combined[[1]] # Should be a list(1, 6) #'} # nolint end @@ -453,7 +453,7 @@ setMethod("combineByKey", convertEnvsToList(keys, combiners) } locallyCombined <- lapplyPartition(x, combineLocally) - shuffled <- partitionBy(locallyCombined, numToInt(numPartitions)) + shuffled <- partitionByRDD(locallyCombined, numToInt(numPartitions)) mergeAfterShuffle <- function(part) { combiners <- new.env() keys <- new.env() @@ -563,13 +563,13 @@ setMethod("foldByKey", #' sc <- sparkR.init() #' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) #' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) -#' join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3)) +#' joinRDD(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3)) #'} # nolint end #' @rdname join-methods #' @aliases join,RDD,RDD-method #' @noRd -setMethod("join", +setMethod("joinRDD", signature(x = "RDD", y = "RDD"), function(x, y, numPartitions) { xTagged <- lapply(x, function(i) { list(i[[1]], list(1L, i[[2]])) }) @@ -772,7 +772,7 @@ setMethod("cogroup", #'\dontrun{ #' sc <- sparkR.init() #' rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3))) -#' collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1)) +#' collectRDD(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1)) #'} # nolint end #' @rdname sortByKey @@ -784,12 +784,12 @@ setMethod("sortByKey", rangeBounds <- list() if (numPartitions > 1) { - rddSize <- count(x) + rddSize <- countRDD(x) # constant from Spark's RangePartitioner maxSampleSize <- numPartitions * 20 fraction <- min(maxSampleSize / max(rddSize, 1), 1.0) - samples <- collect(keys(sampleRDD(x, FALSE, fraction, 1L))) + samples <- collectRDD(keys(sampleRDD(x, FALSE, fraction, 1L))) # Note: the built-in R sort() function only works on atomic vectors samples <- sort(unlist(samples, recursive = FALSE), decreasing = !ascending) @@ -822,7 +822,7 @@ setMethod("sortByKey", sortKeyValueList(part, decreasing = !ascending) } - newRDD <- partitionBy(x, numPartitions, rangePartitionFunc) + newRDD <- partitionByRDD(x, numPartitions, rangePartitionFunc) lapplyPartition(newRDD, partitionFunc) }) @@ -841,7 +841,7 @@ setMethod("sortByKey", #' rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4), #' list("b", 5), list("a", 2))) #' rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1))) -#' collect(subtractByKey(rdd1, rdd2)) +#' collectRDD(subtractByKey(rdd1, rdd2)) #' # list(list("b", 4), list("b", 5)) #'} # nolint end diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R index 56ac8eb728..b5c279e315 100644 --- a/R/pkg/inst/tests/testthat/test_binaryFile.R +++ b/R/pkg/inst/tests/testthat/test_binaryFile.R @@ -31,7 +31,7 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", { rdd <- textFile(sc, fileName1, 1) saveAsObjectFile(rdd, fileName2) rdd <- objectFile(sc, fileName2) - expect_equal(collect(rdd), as.list(mockFile)) + expect_equal(collectRDD(rdd), as.list(mockFile)) unlink(fileName1) unlink(fileName2, recursive = TRUE) @@ -44,7 +44,7 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", { rdd <- parallelize(sc, l, 1) saveAsObjectFile(rdd, fileName) rdd <- objectFile(sc, fileName) - expect_equal(collect(rdd), l) + expect_equal(collectRDD(rdd), l) unlink(fileName, recursive = TRUE) }) @@ -64,7 +64,7 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works", saveAsObjectFile(counts, fileName2) counts <- objectFile(sc, fileName2) - output <- collect(counts) + output <- collectRDD(counts) expected <- list(list("awesome.", 1), list("Spark", 2), list("pretty.", 1), list("is", 2)) expect_equal(sortKeyValueList(output), sortKeyValueList(expected)) @@ -83,7 +83,7 @@ test_that("saveAsObjectFile()/objectFile() works with multiple paths", { saveAsObjectFile(rdd2, fileName2) rdd <- objectFile(sc, c(fileName1, fileName2)) - expect_equal(count(rdd), 2) + expect_equal(countRDD(rdd), 2) unlink(fileName1, recursive = TRUE) unlink(fileName2, recursive = TRUE) diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R index ae7abe20cc..59cb2e6204 100644 --- a/R/pkg/inst/tests/testthat/test_binary_function.R +++ b/R/pkg/inst/tests/testthat/test_binary_function.R @@ -29,7 +29,7 @@ rdd <- parallelize(sc, nums, 2L) mockFile <- c("Spark is pretty.", "Spark is awesome.") test_that("union on two RDDs", { - actual <- collect(unionRDD(rdd, rdd)) + actual <- collectRDD(unionRDD(rdd, rdd)) expect_equal(actual, as.list(rep(nums, 2))) fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") @@ -37,13 +37,13 @@ test_that("union on two RDDs", { text.rdd <- textFile(sc, fileName) union.rdd <- unionRDD(rdd, text.rdd) - actual <- collect(union.rdd) + actual <- collectRDD(union.rdd) expect_equal(actual, c(as.list(nums), mockFile)) expect_equal(getSerializedMode(union.rdd), "byte") rdd <- map(text.rdd, function(x) {x}) union.rdd <- unionRDD(rdd, text.rdd) - actual <- collect(union.rdd) + actual <- collectRDD(union.rdd) expect_equal(actual, as.list(c(mockFile, mockFile))) expect_equal(getSerializedMode(union.rdd), "byte") @@ -54,14 +54,14 @@ test_that("cogroup on two RDDs", { rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) - actual <- collect(cogroup.rdd) + actual <- collectRDD(cogroup.rdd) expect_equal(actual, list(list(1, list(list(1), list(2, 3))), list(2, list(list(4), list())))) rdd1 <- parallelize(sc, list(list("a", 1), list("a", 4))) rdd2 <- parallelize(sc, list(list("b", 2), list("a", 3))) cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) - actual <- collect(cogroup.rdd) + actual <- collectRDD(cogroup.rdd) expected <- list(list("b", list(list(), list(2))), list("a", list(list(1, 4), list(3)))) expect_equal(sortKeyValueList(actual), @@ -72,7 +72,7 @@ test_that("zipPartitions() on RDDs", { rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2 rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4 rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6 - actual <- collect(zipPartitions(rdd1, rdd2, rdd3, + actual <- collectRDD(zipPartitions(rdd1, rdd2, rdd3, func = function(x, y, z) { list(list(x, y, z))} )) expect_equal(actual, list(list(1, c(1, 2), c(1, 2, 3)), list(2, c(3, 4), c(4, 5, 6)))) @@ -82,19 +82,19 @@ test_that("zipPartitions() on RDDs", { writeLines(mockFile, fileName) rdd <- textFile(sc, fileName, 1) - actual <- collect(zipPartitions(rdd, rdd, + actual <- collectRDD(zipPartitions(rdd, rdd, func = function(x, y) { list(paste(x, y, sep = "\n")) })) expected <- list(paste(mockFile, mockFile, sep = "\n")) expect_equal(actual, expected) rdd1 <- parallelize(sc, 0:1, 1) - actual <- collect(zipPartitions(rdd1, rdd, + actual <- collectRDD(zipPartitions(rdd1, rdd, func = function(x, y) { list(x + nchar(y)) })) expected <- list(0:1 + nchar(mockFile)) expect_equal(actual, expected) rdd <- map(rdd, function(x) { x }) - actual <- collect(zipPartitions(rdd, rdd1, + actual <- collectRDD(zipPartitions(rdd, rdd1, func = function(x, y) { list(y + nchar(x)) })) expect_equal(actual, expected) diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R index c7fefb5cf9..65f204d096 100644 --- a/R/pkg/inst/tests/testthat/test_broadcast.R +++ b/R/pkg/inst/tests/testthat/test_broadcast.R @@ -32,7 +32,7 @@ test_that("using broadcast variable", { useBroadcast <- function(x) { sum(SparkR:::value(randomMatBr) * x) } - actual <- collect(lapply(rrdd, useBroadcast)) + actual <- collectRDD(lapply(rrdd, useBroadcast)) expected <- list(sum(randomMat) * 1, sum(randomMat) * 2) expect_equal(actual, expected) }) @@ -43,7 +43,7 @@ test_that("without using broadcast variable", { useBroadcast <- function(x) { sum(randomMat * x) } - actual <- collect(lapply(rrdd, useBroadcast)) + actual <- collectRDD(lapply(rrdd, useBroadcast)) expected <- list(sum(randomMat) * 1, sum(randomMat) * 2) expect_equal(actual, expected) }) diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R index 8bd134a58d..1ab7f319df 100644 --- a/R/pkg/inst/tests/testthat/test_context.R +++ b/R/pkg/inst/tests/testthat/test_context.R @@ -58,7 +58,7 @@ test_that("repeatedly starting and stopping SparkR", { for (i in 1:4) { sc <- suppressWarnings(sparkR.init()) rdd <- parallelize(sc, 1:20, 2L) - expect_equal(count(rdd), 20) + expect_equal(countRDD(rdd), 20) suppressWarnings(sparkR.stop()) } }) @@ -90,8 +90,8 @@ test_that("rdd GC across sparkR.stop", { rm(rdd2) gc() - count(rdd3) - count(rdd4) + countRDD(rdd3) + countRDD(rdd4) sparkR.session.stop() }) diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R index ca2b900572..563ea298c2 100644 --- a/R/pkg/inst/tests/testthat/test_includePackage.R +++ b/R/pkg/inst/tests/testthat/test_includePackage.R @@ -37,7 +37,7 @@ test_that("include inside function", { } data <- lapplyPartition(rdd, generateData) - actual <- collect(data) + actual <- collectRDD(data) } }) @@ -53,7 +53,7 @@ test_that("use include package", { includePackage(sc, plyr) data <- lapplyPartition(rdd, generateData) - actual <- collect(data) + actual <- collectRDD(data) } }) diff --git a/R/pkg/inst/tests/testthat/test_parallelize_collect.R b/R/pkg/inst/tests/testthat/test_parallelize_collect.R index 959d7ab9e6..55972e1ba4 100644 --- a/R/pkg/inst/tests/testthat/test_parallelize_collect.R +++ b/R/pkg/inst/tests/testthat/test_parallelize_collect.R @@ -67,22 +67,22 @@ test_that("parallelize() on simple vectors and lists returns an RDD", { test_that("collect(), following a parallelize(), gives back the original collections", { numVectorRDD <- parallelize(jsc, numVector, 10) - expect_equal(collect(numVectorRDD), as.list(numVector)) + expect_equal(collectRDD(numVectorRDD), as.list(numVector)) numListRDD <- parallelize(jsc, numList, 1) numListRDD2 <- parallelize(jsc, numList, 4) - expect_equal(collect(numListRDD), as.list(numList)) - expect_equal(collect(numListRDD2), as.list(numList)) + expect_equal(collectRDD(numListRDD), as.list(numList)) + expect_equal(collectRDD(numListRDD2), as.list(numList)) strVectorRDD <- parallelize(jsc, strVector, 2) strVectorRDD2 <- parallelize(jsc, strVector, 3) - expect_equal(collect(strVectorRDD), as.list(strVector)) - expect_equal(collect(strVectorRDD2), as.list(strVector)) + expect_equal(collectRDD(strVectorRDD), as.list(strVector)) + expect_equal(collectRDD(strVectorRDD2), as.list(strVector)) strListRDD <- parallelize(jsc, strList, 4) strListRDD2 <- parallelize(jsc, strList, 1) - expect_equal(collect(strListRDD), as.list(strList)) - expect_equal(collect(strListRDD2), as.list(strList)) + expect_equal(collectRDD(strListRDD), as.list(strList)) + expect_equal(collectRDD(strListRDD2), as.list(strList)) }) test_that("regression: collect() following a parallelize() does not drop elements", { @@ -90,7 +90,7 @@ test_that("regression: collect() following a parallelize() does not drop element collLen <- 10 numPart <- 6 expected <- runif(collLen) - actual <- collect(parallelize(jsc, expected, numPart)) + actual <- collectRDD(parallelize(jsc, expected, numPart)) expect_equal(actual, as.list(expected)) }) @@ -99,14 +99,14 @@ test_that("parallelize() and collect() work for lists of pairs (pairwise data)", numPairsRDDD1 <- parallelize(jsc, numPairs, 1) numPairsRDDD2 <- parallelize(jsc, numPairs, 2) numPairsRDDD3 <- parallelize(jsc, numPairs, 3) - expect_equal(collect(numPairsRDDD1), numPairs) - expect_equal(collect(numPairsRDDD2), numPairs) - expect_equal(collect(numPairsRDDD3), numPairs) + expect_equal(collectRDD(numPairsRDDD1), numPairs) + expect_equal(collectRDD(numPairsRDDD2), numPairs) + expect_equal(collectRDD(numPairsRDDD3), numPairs) # can also leave out the parameter name, if the params are supplied in order strPairsRDDD1 <- parallelize(jsc, strPairs, 1) strPairsRDDD2 <- parallelize(jsc, strPairs, 2) - expect_equal(collect(strPairsRDDD1), strPairs) - expect_equal(collect(strPairsRDDD2), strPairs) + expect_equal(collectRDD(strPairsRDDD1), strPairs) + expect_equal(collectRDD(strPairsRDDD2), strPairs) }) sparkR.session.stop() diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R index 508a3a7dfd..a3d66c245a 100644 --- a/R/pkg/inst/tests/testthat/test_rdd.R +++ b/R/pkg/inst/tests/testthat/test_rdd.R @@ -34,14 +34,14 @@ test_that("get number of partitions in RDD", { }) test_that("first on RDD", { - expect_equal(first(rdd), 1) + expect_equal(firstRDD(rdd), 1) newrdd <- lapply(rdd, function(x) x + 1) - expect_equal(first(newrdd), 2) + expect_equal(firstRDD(newrdd), 2) }) test_that("count and length on RDD", { - expect_equal(count(rdd), 10) - expect_equal(length(rdd), 10) + expect_equal(countRDD(rdd), 10) + expect_equal(lengthRDD(rdd), 10) }) test_that("count by values and keys", { @@ -57,40 +57,40 @@ test_that("count by values and keys", { test_that("lapply on RDD", { multiples <- lapply(rdd, function(x) { 2 * x }) - actual <- collect(multiples) + actual <- collectRDD(multiples) expect_equal(actual, as.list(nums * 2)) }) test_that("lapplyPartition on RDD", { sums <- lapplyPartition(rdd, function(part) { sum(unlist(part)) }) - actual <- collect(sums) + actual <- collectRDD(sums) expect_equal(actual, list(15, 40)) }) test_that("mapPartitions on RDD", { sums <- mapPartitions(rdd, function(part) { sum(unlist(part)) }) - actual <- collect(sums) + actual <- collectRDD(sums) expect_equal(actual, list(15, 40)) }) test_that("flatMap() on RDDs", { flat <- flatMap(intRdd, function(x) { list(x, x) }) - actual <- collect(flat) + actual <- collectRDD(flat) expect_equal(actual, rep(intPairs, each = 2)) }) test_that("filterRDD on RDD", { filtered.rdd <- filterRDD(rdd, function(x) { x %% 2 == 0 }) - actual <- collect(filtered.rdd) + actual <- collectRDD(filtered.rdd) expect_equal(actual, list(2, 4, 6, 8, 10)) filtered.rdd <- Filter(function(x) { x[[2]] < 0 }, intRdd) - actual <- collect(filtered.rdd) + actual <- collectRDD(filtered.rdd) expect_equal(actual, list(list(1L, -1))) # Filter out all elements. filtered.rdd <- filterRDD(rdd, function(x) { x > 10 }) - actual <- collect(filtered.rdd) + actual <- collectRDD(filtered.rdd) expect_equal(actual, list()) }) @@ -110,7 +110,7 @@ test_that("several transformations on RDD (a benchmark on PipelinedRDD)", { part <- as.list(unlist(part) * partIndex + i) }) rdd2 <- lapply(rdd2, function(x) x + x) - actual <- collect(rdd2) + actual <- collectRDD(rdd2) expected <- list(24, 24, 24, 24, 24, 168, 170, 172, 174, 176) expect_equal(actual, expected) @@ -126,20 +126,20 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp part <- as.list(unlist(part) * partIndex) }) - cache(rdd2) + cacheRDD(rdd2) expect_true(rdd2@env$isCached) rdd2 <- lapply(rdd2, function(x) x) expect_false(rdd2@env$isCached) - unpersist(rdd2) + unpersistRDD(rdd2) expect_false(rdd2@env$isCached) - persist(rdd2, "MEMORY_AND_DISK") + persistRDD(rdd2, "MEMORY_AND_DISK") expect_true(rdd2@env$isCached) rdd2 <- lapply(rdd2, function(x) x) expect_false(rdd2@env$isCached) - unpersist(rdd2) + unpersistRDD(rdd2) expect_false(rdd2@env$isCached) tempDir <- tempfile(pattern = "checkpoint") @@ -152,7 +152,7 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp expect_false(rdd2@env$isCheckpointed) # make sure the data is collectable - collect(rdd2) + collectRDD(rdd2) unlink(tempDir) }) @@ -169,21 +169,21 @@ test_that("reduce on RDD", { test_that("lapply with dependency", { fa <- 5 multiples <- lapply(rdd, function(x) { fa * x }) - actual <- collect(multiples) + actual <- collectRDD(multiples) expect_equal(actual, as.list(nums * 5)) }) test_that("lapplyPartitionsWithIndex on RDDs", { func <- function(partIndex, part) { list(partIndex, Reduce("+", part)) } - actual <- collect(lapplyPartitionsWithIndex(rdd, func), flatten = FALSE) + actual <- collectRDD(lapplyPartitionsWithIndex(rdd, func), flatten = FALSE) expect_equal(actual, list(list(0, 15), list(1, 40))) pairsRDD <- parallelize(sc, list(list(1, 2), list(3, 4), list(4, 8)), 1L) partitionByParity <- function(key) { if (key %% 2 == 1) 0 else 1 } mkTup <- function(partIndex, part) { list(partIndex, part) } - actual <- collect(lapplyPartitionsWithIndex( - partitionBy(pairsRDD, 2L, partitionByParity), + actual <- collectRDD(lapplyPartitionsWithIndex( + partitionByRDD(pairsRDD, 2L, partitionByParity), mkTup), FALSE) expect_equal(actual, list(list(0, list(list(1, 2), list(3, 4))), @@ -191,7 +191,7 @@ test_that("lapplyPartitionsWithIndex on RDDs", { }) test_that("sampleRDD() on RDDs", { - expect_equal(unlist(collect(sampleRDD(rdd, FALSE, 1.0, 2014L))), nums) + expect_equal(unlist(collectRDD(sampleRDD(rdd, FALSE, 1.0, 2014L))), nums) }) test_that("takeSample() on RDDs", { @@ -238,7 +238,7 @@ test_that("takeSample() on RDDs", { test_that("mapValues() on pairwise RDDs", { multiples <- mapValues(intRdd, function(x) { x * 2 }) - actual <- collect(multiples) + actual <- collectRDD(multiples) expected <- lapply(intPairs, function(x) { list(x[[1]], x[[2]] * 2) }) @@ -247,11 +247,11 @@ test_that("mapValues() on pairwise RDDs", { test_that("flatMapValues() on pairwise RDDs", { l <- parallelize(sc, list(list(1, c(1, 2)), list(2, c(3, 4)))) - actual <- collect(flatMapValues(l, function(x) { x })) + actual <- collectRDD(flatMapValues(l, function(x) { x })) expect_equal(actual, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) # Generate x to x+1 for every value - actual <- collect(flatMapValues(intRdd, function(x) { x: (x + 1) })) + actual <- collectRDD(flatMapValues(intRdd, function(x) { x: (x + 1) })) expect_equal(actual, list(list(1L, -1), list(1L, 0), list(2L, 100), list(2L, 101), list(2L, 1), list(2L, 2), list(1L, 200), list(1L, 201))) @@ -273,8 +273,8 @@ test_that("reduceByKeyLocally() on PairwiseRDDs", { test_that("distinct() on RDDs", { nums.rep2 <- rep(1:10, 2) rdd.rep2 <- parallelize(sc, nums.rep2, 2L) - uniques <- distinct(rdd.rep2) - actual <- sort(unlist(collect(uniques))) + uniques <- distinctRDD(rdd.rep2) + actual <- sort(unlist(collectRDD(uniques))) expect_equal(actual, nums) }) @@ -296,7 +296,7 @@ test_that("sumRDD() on RDDs", { test_that("keyBy on RDDs", { func <- function(x) { x * x } keys <- keyBy(rdd, func) - actual <- collect(keys) + actual <- collectRDD(keys) expect_equal(actual, lapply(nums, function(x) { list(func(x), x) })) }) @@ -304,12 +304,12 @@ test_that("repartition/coalesce on RDDs", { rdd <- parallelize(sc, 1:20, 4L) # each partition contains 5 elements # repartition - r1 <- repartition(rdd, 2) + r1 <- repartitionRDD(rdd, 2) expect_equal(getNumPartitions(r1), 2L) count <- length(collectPartition(r1, 0L)) expect_true(count >= 8 && count <= 12) - r2 <- repartition(rdd, 6) + r2 <- repartitionRDD(rdd, 6) expect_equal(getNumPartitions(r2), 6L) count <- length(collectPartition(r2, 0L)) expect_true(count >= 0 && count <= 4) @@ -323,12 +323,12 @@ test_that("repartition/coalesce on RDDs", { test_that("sortBy() on RDDs", { sortedRdd <- sortBy(rdd, function(x) { x * x }, ascending = FALSE) - actual <- collect(sortedRdd) + actual <- collectRDD(sortedRdd) expect_equal(actual, as.list(sort(nums, decreasing = TRUE))) rdd2 <- parallelize(sc, sort(nums, decreasing = TRUE), 2L) sortedRdd2 <- sortBy(rdd2, function(x) { x * x }) - actual <- collect(sortedRdd2) + actual <- collectRDD(sortedRdd2) expect_equal(actual, as.list(nums)) }) @@ -380,13 +380,13 @@ test_that("aggregateRDD() on RDDs", { test_that("zipWithUniqueId() on RDDs", { rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) - actual <- collect(zipWithUniqueId(rdd)) + actual <- collectRDD(zipWithUniqueId(rdd)) expected <- list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2)) expect_equal(actual, expected) rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 1L) - actual <- collect(zipWithUniqueId(rdd)) + actual <- collectRDD(zipWithUniqueId(rdd)) expected <- list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4)) expect_equal(actual, expected) @@ -394,13 +394,13 @@ test_that("zipWithUniqueId() on RDDs", { test_that("zipWithIndex() on RDDs", { rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) - actual <- collect(zipWithIndex(rdd)) + actual <- collectRDD(zipWithIndex(rdd)) expected <- list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4)) expect_equal(actual, expected) rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 1L) - actual <- collect(zipWithIndex(rdd)) + actual <- collectRDD(zipWithIndex(rdd)) expected <- list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4)) expect_equal(actual, expected) @@ -408,35 +408,35 @@ test_that("zipWithIndex() on RDDs", { test_that("glom() on RDD", { rdd <- parallelize(sc, as.list(1:4), 2L) - actual <- collect(glom(rdd)) + actual <- collectRDD(glom(rdd)) expect_equal(actual, list(list(1, 2), list(3, 4))) }) test_that("keys() on RDDs", { keys <- keys(intRdd) - actual <- collect(keys) + actual <- collectRDD(keys) expect_equal(actual, lapply(intPairs, function(x) { x[[1]] })) }) test_that("values() on RDDs", { values <- values(intRdd) - actual <- collect(values) + actual <- collectRDD(values) expect_equal(actual, lapply(intPairs, function(x) { x[[2]] })) }) test_that("pipeRDD() on RDDs", { - actual <- collect(pipeRDD(rdd, "more")) + actual <- collectRDD(pipeRDD(rdd, "more")) expected <- as.list(as.character(1:10)) expect_equal(actual, expected) trailed.rdd <- parallelize(sc, c("1", "", "2\n", "3\n\r\n")) - actual <- collect(pipeRDD(trailed.rdd, "sort")) + actual <- collectRDD(pipeRDD(trailed.rdd, "sort")) expected <- list("", "1", "2", "3") expect_equal(actual, expected) rev.nums <- 9:0 rev.rdd <- parallelize(sc, rev.nums, 2L) - actual <- collect(pipeRDD(rev.rdd, "sort")) + actual <- collectRDD(pipeRDD(rev.rdd, "sort")) expected <- as.list(as.character(c(5:9, 0:4))) expect_equal(actual, expected) }) @@ -444,7 +444,7 @@ test_that("pipeRDD() on RDDs", { test_that("zipRDD() on RDDs", { rdd1 <- parallelize(sc, 0:4, 2) rdd2 <- parallelize(sc, 1000:1004, 2) - actual <- collect(zipRDD(rdd1, rdd2)) + actual <- collectRDD(zipRDD(rdd1, rdd2)) expect_equal(actual, list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004))) @@ -453,17 +453,17 @@ test_that("zipRDD() on RDDs", { writeLines(mockFile, fileName) rdd <- textFile(sc, fileName, 1) - actual <- collect(zipRDD(rdd, rdd)) + actual <- collectRDD(zipRDD(rdd, rdd)) expected <- lapply(mockFile, function(x) { list(x, x) }) expect_equal(actual, expected) rdd1 <- parallelize(sc, 0:1, 1) - actual <- collect(zipRDD(rdd1, rdd)) + actual <- collectRDD(zipRDD(rdd1, rdd)) expected <- lapply(0:1, function(x) { list(x, mockFile[x + 1]) }) expect_equal(actual, expected) rdd1 <- map(rdd, function(x) { x }) - actual <- collect(zipRDD(rdd, rdd1)) + actual <- collectRDD(zipRDD(rdd, rdd1)) expected <- lapply(mockFile, function(x) { list(x, x) }) expect_equal(actual, expected) @@ -472,7 +472,7 @@ test_that("zipRDD() on RDDs", { test_that("cartesian() on RDDs", { rdd <- parallelize(sc, 1:3) - actual <- collect(cartesian(rdd, rdd)) + actual <- collectRDD(cartesian(rdd, rdd)) expect_equal(sortKeyValueList(actual), list( list(1, 1), list(1, 2), list(1, 3), @@ -481,7 +481,7 @@ test_that("cartesian() on RDDs", { # test case where one RDD is empty emptyRdd <- parallelize(sc, list()) - actual <- collect(cartesian(rdd, emptyRdd)) + actual <- collectRDD(cartesian(rdd, emptyRdd)) expect_equal(actual, list()) mockFile <- c("Spark is pretty.", "Spark is awesome.") @@ -489,7 +489,7 @@ test_that("cartesian() on RDDs", { writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) - actual <- collect(cartesian(rdd, rdd)) + actual <- collectRDD(cartesian(rdd, rdd)) expected <- list( list("Spark is awesome.", "Spark is pretty."), list("Spark is awesome.", "Spark is awesome."), @@ -498,7 +498,7 @@ test_that("cartesian() on RDDs", { expect_equal(sortKeyValueList(actual), expected) rdd1 <- parallelize(sc, 0:1) - actual <- collect(cartesian(rdd1, rdd)) + actual <- collectRDD(cartesian(rdd1, rdd)) expect_equal(sortKeyValueList(actual), list( list(0, "Spark is pretty."), @@ -507,7 +507,7 @@ test_that("cartesian() on RDDs", { list(1, "Spark is awesome."))) rdd1 <- map(rdd, function(x) { x }) - actual <- collect(cartesian(rdd, rdd1)) + actual <- collectRDD(cartesian(rdd, rdd1)) expect_equal(sortKeyValueList(actual), expected) unlink(fileName) @@ -518,24 +518,24 @@ test_that("subtract() on RDDs", { rdd1 <- parallelize(sc, l) # subtract by itself - actual <- collect(subtract(rdd1, rdd1)) + actual <- collectRDD(subtract(rdd1, rdd1)) expect_equal(actual, list()) # subtract by an empty RDD rdd2 <- parallelize(sc, list()) - actual <- collect(subtract(rdd1, rdd2)) + actual <- collectRDD(subtract(rdd1, rdd2)) expect_equal(as.list(sort(as.vector(actual, mode = "integer"))), l) rdd2 <- parallelize(sc, list(2, 4)) - actual <- collect(subtract(rdd1, rdd2)) + actual <- collectRDD(subtract(rdd1, rdd2)) expect_equal(as.list(sort(as.vector(actual, mode = "integer"))), list(1, 1, 3)) l <- list("a", "a", "b", "b", "c", "d") rdd1 <- parallelize(sc, l) rdd2 <- parallelize(sc, list("b", "d")) - actual <- collect(subtract(rdd1, rdd2)) + actual <- collectRDD(subtract(rdd1, rdd2)) expect_equal(as.list(sort(as.vector(actual, mode = "character"))), list("a", "a", "c")) }) @@ -546,17 +546,17 @@ test_that("subtractByKey() on pairwise RDDs", { rdd1 <- parallelize(sc, l) # subtractByKey by itself - actual <- collect(subtractByKey(rdd1, rdd1)) + actual <- collectRDD(subtractByKey(rdd1, rdd1)) expect_equal(actual, list()) # subtractByKey by an empty RDD rdd2 <- parallelize(sc, list()) - actual <- collect(subtractByKey(rdd1, rdd2)) + actual <- collectRDD(subtractByKey(rdd1, rdd2)) expect_equal(sortKeyValueList(actual), sortKeyValueList(l)) rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1))) - actual <- collect(subtractByKey(rdd1, rdd2)) + actual <- collectRDD(subtractByKey(rdd1, rdd2)) expect_equal(actual, list(list("b", 4), list("b", 5))) @@ -564,76 +564,76 @@ test_that("subtractByKey() on pairwise RDDs", { list(2, 5), list(1, 2)) rdd1 <- parallelize(sc, l) rdd2 <- parallelize(sc, list(list(1, 3), list(3, 1))) - actual <- collect(subtractByKey(rdd1, rdd2)) + actual <- collectRDD(subtractByKey(rdd1, rdd2)) expect_equal(actual, list(list(2, 4), list(2, 5))) }) test_that("intersection() on RDDs", { # intersection with self - actual <- collect(intersection(rdd, rdd)) + actual <- collectRDD(intersection(rdd, rdd)) expect_equal(sort(as.integer(actual)), nums) # intersection with an empty RDD emptyRdd <- parallelize(sc, list()) - actual <- collect(intersection(rdd, emptyRdd)) + actual <- collectRDD(intersection(rdd, emptyRdd)) expect_equal(actual, list()) rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5)) rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8)) - actual <- collect(intersection(rdd1, rdd2)) + actual <- collectRDD(intersection(rdd1, rdd2)) expect_equal(sort(as.integer(actual)), 1:3) }) test_that("join() on pairwise RDDs", { rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) - actual <- collect(join(rdd1, rdd2, 2L)) + actual <- collectRDD(joinRDD(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list(1, list(1, 2)), list(1, list(1, 3))))) rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4))) rdd2 <- parallelize(sc, list(list("a", 2), list("a", 3))) - actual <- collect(join(rdd1, rdd2, 2L)) + actual <- collectRDD(joinRDD(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list("a", list(1, 2)), list("a", list(1, 3))))) rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) - actual <- collect(join(rdd1, rdd2, 2L)) + actual <- collectRDD(joinRDD(rdd1, rdd2, 2L)) expect_equal(actual, list()) rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) - actual <- collect(join(rdd1, rdd2, 2L)) + actual <- collectRDD(joinRDD(rdd1, rdd2, 2L)) expect_equal(actual, list()) }) test_that("leftOuterJoin() on pairwise RDDs", { rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) - actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4))) rdd2 <- parallelize(sc, list(list("a", 2), list("a", 3))) - actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(4, NULL)), list("a", list(1, 2)), list("a", list(1, 3))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) - actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(1, NULL)), list(2, list(2, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) - actual <- collect(leftOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(2, NULL)), list("a", list(1, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -642,26 +642,26 @@ test_that("leftOuterJoin() on pairwise RDDs", { test_that("rightOuterJoin() on pairwise RDDs", { rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3))) rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) - actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) rdd1 <- parallelize(sc, list(list("a", 2), list("a", 3))) rdd2 <- parallelize(sc, list(list("a", 1), list("b", 4))) - actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(NULL, 4)), list("a", list(2, 1)), list("a", list(3, 1))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) - actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list(3, list(NULL, 3)), list(4, list(NULL, 4))))) rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) - actual <- collect(rightOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list("d", list(NULL, 4)), list("c", list(NULL, 3))))) }) @@ -669,14 +669,14 @@ test_that("rightOuterJoin() on pairwise RDDs", { test_that("fullOuterJoin() on pairwise RDDs", { rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3))) rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) - actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L)) expected <- list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)), list(3, list(3, NULL))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) rdd1 <- parallelize(sc, list(list("a", 2), list("a", 3), list("c", 1))) rdd2 <- parallelize(sc, list(list("a", 1), list("b", 4))) - actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L)) expected <- list(list("b", list(NULL, 4)), list("a", list(2, 1)), list("a", list(3, 1)), list("c", list(1, NULL))) expect_equal(sortKeyValueList(actual), @@ -684,14 +684,14 @@ test_that("fullOuterJoin() on pairwise RDDs", { rdd1 <- parallelize(sc, list(list(1, 1), list(2, 2))) rdd2 <- parallelize(sc, list(list(3, 3), list(4, 4))) - actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list(1, list(1, NULL)), list(2, list(2, NULL)), list(3, list(NULL, 3)), list(4, list(NULL, 4))))) rdd1 <- parallelize(sc, list(list("a", 1), list("b", 2))) rdd2 <- parallelize(sc, list(list("c", 3), list("d", 4))) - actual <- collect(fullOuterJoin(rdd1, rdd2, 2L)) + actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(list(list("a", list(1, NULL)), list("b", list(2, NULL)), list("d", list(NULL, 4)), list("c", list(NULL, 3))))) @@ -700,21 +700,21 @@ test_that("fullOuterJoin() on pairwise RDDs", { test_that("sortByKey() on pairwise RDDs", { numPairsRdd <- map(rdd, function(x) { list (x, x) }) sortedRdd <- sortByKey(numPairsRdd, ascending = FALSE) - actual <- collect(sortedRdd) + actual <- collectRDD(sortedRdd) numPairs <- lapply(nums, function(x) { list (x, x) }) expect_equal(actual, sortKeyValueList(numPairs, decreasing = TRUE)) rdd2 <- parallelize(sc, sort(nums, decreasing = TRUE), 2L) numPairsRdd2 <- map(rdd2, function(x) { list (x, x) }) sortedRdd2 <- sortByKey(numPairsRdd2) - actual <- collect(sortedRdd2) + actual <- collectRDD(sortedRdd2) expect_equal(actual, numPairs) # sort by string keys l <- list(list("a", 1), list("b", 2), list("1", 3), list("d", 4), list("2", 5)) rdd3 <- parallelize(sc, l, 2L) sortedRdd3 <- sortByKey(rdd3) - actual <- collect(sortedRdd3) + actual <- collectRDD(sortedRdd3) expect_equal(actual, list(list("1", 3), list("2", 5), list("a", 1), list("b", 2), list("d", 4))) # test on the boundary cases @@ -722,27 +722,27 @@ test_that("sortByKey() on pairwise RDDs", { # boundary case 1: the RDD to be sorted has only 1 partition rdd4 <- parallelize(sc, l, 1L) sortedRdd4 <- sortByKey(rdd4) - actual <- collect(sortedRdd4) + actual <- collectRDD(sortedRdd4) expect_equal(actual, list(list("1", 3), list("2", 5), list("a", 1), list("b", 2), list("d", 4))) # boundary case 2: the sorted RDD has only 1 partition rdd5 <- parallelize(sc, l, 2L) sortedRdd5 <- sortByKey(rdd5, numPartitions = 1L) - actual <- collect(sortedRdd5) + actual <- collectRDD(sortedRdd5) expect_equal(actual, list(list("1", 3), list("2", 5), list("a", 1), list("b", 2), list("d", 4))) # boundary case 3: the RDD to be sorted has only 1 element l2 <- list(list("a", 1)) rdd6 <- parallelize(sc, l2, 2L) sortedRdd6 <- sortByKey(rdd6) - actual <- collect(sortedRdd6) + actual <- collectRDD(sortedRdd6) expect_equal(actual, l2) # boundary case 4: the RDD to be sorted has 0 element l3 <- list() rdd7 <- parallelize(sc, l3, 2L) sortedRdd7 <- sortByKey(rdd7) - actual <- collect(sortedRdd7) + actual <- collectRDD(sortedRdd7) expect_equal(actual, l3) }) @@ -766,7 +766,7 @@ test_that("collectAsMap() on a pairwise RDD", { test_that("show()", { rdd <- parallelize(sc, list(1:10)) - expect_output(show(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+") + expect_output(showRDD(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+") }) test_that("sampleByKey() on pairwise RDDs", { diff --git a/R/pkg/inst/tests/testthat/test_shuffle.R b/R/pkg/inst/tests/testthat/test_shuffle.R index 2586056773..d38efab0fd 100644 --- a/R/pkg/inst/tests/testthat/test_shuffle.R +++ b/R/pkg/inst/tests/testthat/test_shuffle.R @@ -39,7 +39,7 @@ strListRDD <- parallelize(sc, strList, 4) test_that("groupByKey for integers", { grouped <- groupByKey(intRdd, 2L) - actual <- collect(grouped) + actual <- collectRDD(grouped) expected <- list(list(2L, list(100, 1)), list(1L, list(-1, 200))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -48,7 +48,7 @@ test_that("groupByKey for integers", { test_that("groupByKey for doubles", { grouped <- groupByKey(doubleRdd, 2L) - actual <- collect(grouped) + actual <- collectRDD(grouped) expected <- list(list(1.5, list(-1, 200)), list(2.5, list(100, 1))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -57,7 +57,7 @@ test_that("groupByKey for doubles", { test_that("reduceByKey for ints", { reduced <- reduceByKey(intRdd, "+", 2L) - actual <- collect(reduced) + actual <- collectRDD(reduced) expected <- list(list(2L, 101), list(1L, 199)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -65,7 +65,7 @@ test_that("reduceByKey for ints", { test_that("reduceByKey for doubles", { reduced <- reduceByKey(doubleRdd, "+", 2L) - actual <- collect(reduced) + actual <- collectRDD(reduced) expected <- list(list(1.5, 199), list(2.5, 101)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -74,7 +74,7 @@ test_that("reduceByKey for doubles", { test_that("combineByKey for ints", { reduced <- combineByKey(intRdd, function(x) { x }, "+", "+", 2L) - actual <- collect(reduced) + actual <- collectRDD(reduced) expected <- list(list(2L, 101), list(1L, 199)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -82,7 +82,7 @@ test_that("combineByKey for ints", { test_that("combineByKey for doubles", { reduced <- combineByKey(doubleRdd, function(x) { x }, "+", "+", 2L) - actual <- collect(reduced) + actual <- collectRDD(reduced) expected <- list(list(1.5, 199), list(2.5, 101)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -94,7 +94,7 @@ test_that("combineByKey for characters", { list("other", 3L), list("max", 4L)), 2L) reduced <- combineByKey(stringKeyRDD, function(x) { x }, "+", "+", 2L) - actual <- collect(reduced) + actual <- collectRDD(reduced) expected <- list(list("max", 5L), list("min", 2L), list("other", 3L)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -109,7 +109,7 @@ test_that("aggregateByKey", { combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L) - actual <- collect(aggregatedRDD) + actual <- collectRDD(aggregatedRDD) expected <- list(list(1, list(3, 2)), list(2, list(7, 2))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -122,7 +122,7 @@ test_that("aggregateByKey", { combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L) - actual <- collect(aggregatedRDD) + actual <- collectRDD(aggregatedRDD) expected <- list(list("a", list(3, 2)), list("b", list(7, 2))) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -132,7 +132,7 @@ test_that("foldByKey", { # test foldByKey for int keys folded <- foldByKey(intRdd, 0, "+", 2L) - actual <- collect(folded) + actual <- collectRDD(folded) expected <- list(list(2L, 101), list(1L, 199)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -140,7 +140,7 @@ test_that("foldByKey", { # test foldByKey for double keys folded <- foldByKey(doubleRdd, 0, "+", 2L) - actual <- collect(folded) + actual <- collectRDD(folded) expected <- list(list(1.5, 199), list(2.5, 101)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -151,7 +151,7 @@ test_that("foldByKey", { stringKeyRDD <- parallelize(sc, stringKeyPairs) folded <- foldByKey(stringKeyRDD, 0, "+", 2L) - actual <- collect(folded) + actual <- collectRDD(folded) expected <- list(list("b", 101), list("a", 199)) expect_equal(sortKeyValueList(actual), sortKeyValueList(expected)) @@ -159,14 +159,14 @@ test_that("foldByKey", { # test foldByKey for empty pair RDD rdd <- parallelize(sc, list()) folded <- foldByKey(rdd, 0, "+", 2L) - actual <- collect(folded) + actual <- collectRDD(folded) expected <- list() expect_equal(actual, expected) # test foldByKey for RDD with only 1 pair rdd <- parallelize(sc, list(list(1, 1))) folded <- foldByKey(rdd, 0, "+", 2L) - actual <- collect(folded) + actual <- collectRDD(folded) expected <- list(list(1, 1)) expect_equal(actual, expected) }) @@ -175,7 +175,7 @@ test_that("partitionBy() partitions data correctly", { # Partition by magnitude partitionByMagnitude <- function(key) { if (key >= 3) 1 else 0 } - resultRDD <- partitionBy(numPairsRdd, 2L, partitionByMagnitude) + resultRDD <- partitionByRDD(numPairsRdd, 2L, partitionByMagnitude) expected_first <- list(list(1, 100), list(2, 200)) # key less than 3 expected_second <- list(list(4, -1), list(3, 1), list(3, 0)) # key greater than or equal 3 @@ -191,7 +191,7 @@ test_that("partitionBy works with dependencies", { partitionByParity <- function(key) { if (key %% 2 == kOne) 7 else 4 } # Partition by parity - resultRDD <- partitionBy(numPairsRdd, numPartitions = 2L, partitionByParity) + resultRDD <- partitionByRDD(numPairsRdd, numPartitions = 2L, partitionByParity) # keys even; 100 %% 2 == 0 expected_first <- list(list(2, 200), list(4, -1)) @@ -208,7 +208,7 @@ test_that("test partitionBy with string keys", { words <- flatMap(strListRDD, function(line) { strsplit(line, " ")[[1]] }) wordCount <- lapply(words, function(word) { list(word, 1L) }) - resultRDD <- partitionBy(wordCount, 2L) + resultRDD <- partitionByRDD(wordCount, 2L) expected_first <- list(list("Dexter", 1), list("Dexter", 1)) expected_second <- list(list("and", 1), list("and", 1)) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 39ed4febe5..3ccb8b6d77 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -490,7 +490,7 @@ test_that("read/write json files", { test_that("jsonRDD() on a RDD with json string", { sqlContext <- suppressWarnings(sparkRSQL.init(sc)) rdd <- parallelize(sc, mockLines) - expect_equal(count(rdd), 3) + expect_equal(countRDD(rdd), 3) df <- suppressWarnings(jsonRDD(sqlContext, rdd)) expect_is(df, "SparkDataFrame") expect_equal(count(df), 3) @@ -582,7 +582,7 @@ test_that("toRDD() returns an RRDD", { df <- read.json(jsonPath) testRDD <- toRDD(df) expect_is(testRDD, "RDD") - expect_equal(count(testRDD), 3) + expect_equal(countRDD(testRDD), 3) }) test_that("union on two RDDs created from DataFrames returns an RRDD", { @@ -592,7 +592,7 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", { unioned <- unionRDD(RDD1, RDD2) expect_is(unioned, "RDD") expect_equal(getSerializedMode(unioned), "byte") - expect_equal(collect(unioned)[[2]]$name, "Andy") + expect_equal(collectRDD(unioned)[[2]]$name, "Andy") }) test_that("union on mixed serialization types correctly returns a byte RRDD", { @@ -614,14 +614,14 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", { unionByte <- unionRDD(rdd, dfRDD) expect_is(unionByte, "RDD") expect_equal(getSerializedMode(unionByte), "byte") - expect_equal(collect(unionByte)[[1]], 1) - expect_equal(collect(unionByte)[[12]]$name, "Andy") + expect_equal(collectRDD(unionByte)[[1]], 1) + expect_equal(collectRDD(unionByte)[[12]]$name, "Andy") unionString <- unionRDD(textRDD, dfRDD) expect_is(unionString, "RDD") expect_equal(getSerializedMode(unionString), "byte") - expect_equal(collect(unionString)[[1]], "Michael") - expect_equal(collect(unionString)[[5]]$name, "Andy") + expect_equal(collectRDD(unionString)[[1]], "Michael") + expect_equal(collectRDD(unionString)[[5]]$name, "Andy") }) test_that("objectFile() works with row serialization", { @@ -633,7 +633,7 @@ test_that("objectFile() works with row serialization", { expect_is(objectIn, "RDD") expect_equal(getSerializedMode(objectIn), "byte") - expect_equal(collect(objectIn)[[2]]$age, 30) + expect_equal(collectRDD(objectIn)[[2]]$age, 30) }) test_that("lapply() on a DataFrame returns an RDD with the correct columns", { @@ -643,7 +643,7 @@ test_that("lapply() on a DataFrame returns an RDD with the correct columns", { row }) expect_is(testRDD, "RDD") - collected <- collect(testRDD) + collected <- collectRDD(testRDD) expect_equal(collected[[1]]$name, "Michael") expect_equal(collected[[2]]$newCol, 35) }) @@ -715,10 +715,10 @@ test_that("multiple pipeline transformations result in an RDD with the correct v row }) expect_is(second, "RDD") - expect_equal(count(second), 3) - expect_equal(collect(second)[[2]]$age, 35) - expect_true(collect(second)[[2]]$testCol) - expect_false(collect(second)[[3]]$testCol) + expect_equal(countRDD(second), 3) + expect_equal(collectRDD(second)[[2]]$age, 35) + expect_true(collectRDD(second)[[2]]$testCol) + expect_false(collectRDD(second)[[3]]$testCol) }) test_that("cache(), persist(), and unpersist() on a DataFrame", { @@ -1608,7 +1608,7 @@ test_that("toJSON() returns an RDD of the correct values", { testRDD <- toJSON(df) expect_is(testRDD, "RDD") expect_equal(getSerializedMode(testRDD), "string") - expect_equal(collect(testRDD)[[1]], mockLines[1]) + expect_equal(collectRDD(testRDD)[[1]], mockLines[1]) }) test_that("showDF()", { diff --git a/R/pkg/inst/tests/testthat/test_take.R b/R/pkg/inst/tests/testthat/test_take.R index 07f00c9915..aaa532856c 100644 --- a/R/pkg/inst/tests/testthat/test_take.R +++ b/R/pkg/inst/tests/testthat/test_take.R @@ -36,34 +36,34 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", test_that("take() gives back the original elements in correct count and order", { numVectorRDD <- parallelize(sc, numVector, 10) # case: number of elements to take is less than the size of the first partition - expect_equal(take(numVectorRDD, 1), as.list(head(numVector, n = 1))) + expect_equal(takeRDD(numVectorRDD, 1), as.list(head(numVector, n = 1))) # case: number of elements to take is the same as the size of the first partition - expect_equal(take(numVectorRDD, 11), as.list(head(numVector, n = 11))) + expect_equal(takeRDD(numVectorRDD, 11), as.list(head(numVector, n = 11))) # case: number of elements to take is greater than all elements - expect_equal(take(numVectorRDD, length(numVector)), as.list(numVector)) - expect_equal(take(numVectorRDD, length(numVector) + 1), as.list(numVector)) + expect_equal(takeRDD(numVectorRDD, length(numVector)), as.list(numVector)) + expect_equal(takeRDD(numVectorRDD, length(numVector) + 1), as.list(numVector)) numListRDD <- parallelize(sc, numList, 1) numListRDD2 <- parallelize(sc, numList, 4) - expect_equal(take(numListRDD, 3), take(numListRDD2, 3)) - expect_equal(take(numListRDD, 5), take(numListRDD2, 5)) - expect_equal(take(numListRDD, 1), as.list(head(numList, n = 1))) - expect_equal(take(numListRDD2, 999), numList) + expect_equal(takeRDD(numListRDD, 3), takeRDD(numListRDD2, 3)) + expect_equal(takeRDD(numListRDD, 5), takeRDD(numListRDD2, 5)) + expect_equal(takeRDD(numListRDD, 1), as.list(head(numList, n = 1))) + expect_equal(takeRDD(numListRDD2, 999), numList) strVectorRDD <- parallelize(sc, strVector, 2) strVectorRDD2 <- parallelize(sc, strVector, 3) - expect_equal(take(strVectorRDD, 4), as.list(strVector)) - expect_equal(take(strVectorRDD2, 2), as.list(head(strVector, n = 2))) + expect_equal(takeRDD(strVectorRDD, 4), as.list(strVector)) + expect_equal(takeRDD(strVectorRDD2, 2), as.list(head(strVector, n = 2))) strListRDD <- parallelize(sc, strList, 4) strListRDD2 <- parallelize(sc, strList, 1) - expect_equal(take(strListRDD, 3), as.list(head(strList, n = 3))) - expect_equal(take(strListRDD2, 1), as.list(head(strList, n = 1))) + expect_equal(takeRDD(strListRDD, 3), as.list(head(strList, n = 3))) + expect_equal(takeRDD(strListRDD2, 1), as.list(head(strList, n = 1))) - expect_equal(length(take(strListRDD, 0)), 0) - expect_equal(length(take(strVectorRDD, 0)), 0) - expect_equal(length(take(numListRDD, 0)), 0) - expect_equal(length(take(numVectorRDD, 0)), 0) + expect_equal(length(takeRDD(strListRDD, 0)), 0) + expect_equal(length(takeRDD(strVectorRDD, 0)), 0) + expect_equal(length(takeRDD(numListRDD, 0)), 0) + expect_equal(length(takeRDD(numVectorRDD, 0)), 0) }) sparkR.session.stop() diff --git a/R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/inst/tests/testthat/test_textFile.R index b7dcbe472a..3b466066e9 100644 --- a/R/pkg/inst/tests/testthat/test_textFile.R +++ b/R/pkg/inst/tests/testthat/test_textFile.R @@ -29,8 +29,8 @@ test_that("textFile() on a local file returns an RDD", { rdd <- textFile(sc, fileName) expect_is(rdd, "RDD") - expect_true(count(rdd) > 0) - expect_equal(count(rdd), 2) + expect_true(countRDD(rdd) > 0) + expect_equal(countRDD(rdd), 2) unlink(fileName) }) @@ -40,7 +40,7 @@ test_that("textFile() followed by a collect() returns the same content", { writeLines(mockFile, fileName) rdd <- textFile(sc, fileName) - expect_equal(collect(rdd), as.list(mockFile)) + expect_equal(collectRDD(rdd), as.list(mockFile)) unlink(fileName) }) @@ -55,7 +55,7 @@ test_that("textFile() word count works as expected", { wordCount <- lapply(words, function(word) { list(word, 1L) }) counts <- reduceByKey(wordCount, "+", 2L) - output <- collect(counts) + output <- collectRDD(counts) expected <- list(list("pretty.", 1), list("is", 2), list("awesome.", 1), list("Spark", 2)) expect_equal(sortKeyValueList(output), sortKeyValueList(expected)) @@ -72,7 +72,7 @@ test_that("several transformations on RDD created by textFile()", { # PipelinedRDD initially created from RDD rdd <- lapply(rdd, function(x) paste(x, x)) } - collect(rdd) + collectRDD(rdd) unlink(fileName) }) @@ -85,7 +85,7 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content", rdd <- textFile(sc, fileName1, 1L) saveAsTextFile(rdd, fileName2) rdd <- textFile(sc, fileName2) - expect_equal(collect(rdd), as.list(mockFile)) + expect_equal(collectRDD(rdd), as.list(mockFile)) unlink(fileName1) unlink(fileName2) @@ -97,7 +97,7 @@ test_that("saveAsTextFile() on a parallelized list works as expected", { rdd <- parallelize(sc, l, 1L) saveAsTextFile(rdd, fileName) rdd <- textFile(sc, fileName) - expect_equal(collect(rdd), lapply(l, function(x) {toString(x)})) + expect_equal(collectRDD(rdd), lapply(l, function(x) {toString(x)})) unlink(fileName) }) @@ -117,7 +117,7 @@ test_that("textFile() and saveAsTextFile() word count works as expected", { saveAsTextFile(counts, fileName2) rdd <- textFile(sc, fileName2) - output <- collect(rdd) + output <- collectRDD(rdd) expected <- list(list("awesome.", 1), list("Spark", 2), list("pretty.", 1), list("is", 2)) expectedStr <- lapply(expected, function(x) { toString(x) }) @@ -134,7 +134,7 @@ test_that("textFile() on multiple paths", { writeLines("Spark is awesome.", fileName2) rdd <- textFile(sc, c(fileName1, fileName2)) - expect_equal(count(rdd), 2) + expect_equal(countRDD(rdd), 2) unlink(fileName1) unlink(fileName2) @@ -147,16 +147,16 @@ test_that("Pipelined operations on RDDs created using textFile", { rdd <- textFile(sc, fileName) lengths <- lapply(rdd, function(x) { length(x) }) - expect_equal(collect(lengths), list(1, 1)) + expect_equal(collectRDD(lengths), list(1, 1)) lengthsPipelined <- lapply(lengths, function(x) { x + 10 }) - expect_equal(collect(lengthsPipelined), list(11, 11)) + expect_equal(collectRDD(lengthsPipelined), list(11, 11)) lengths30 <- lapply(lengthsPipelined, function(x) { x + 20 }) - expect_equal(collect(lengths30), list(31, 31)) + expect_equal(collectRDD(lengths30), list(31, 31)) lengths20 <- lapply(lengths, function(x) { x + 20 }) - expect_equal(collect(lengths20), list(21, 21)) + expect_equal(collectRDD(lengths20), list(21, 21)) unlink(fileName) }) diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 58ff3debfa..83e94a1432 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -24,7 +24,7 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", test_that("convertJListToRList() gives back (deserializes) the original JLists of strings and integers", { # It's hard to manually create a Java List using rJava, since it does not - # support generics well. Instead, we rely on collect() returning a + # support generics well. Instead, we rely on collectRDD() returning a # JList. nums <- as.list(1:10) rdd <- parallelize(sc, nums, 1L) @@ -48,7 +48,7 @@ test_that("serializeToBytes on RDD", { text.rdd <- textFile(sc, fileName) expect_equal(getSerializedMode(text.rdd), "string") ser.rdd <- serializeToBytes(text.rdd) - expect_equal(collect(ser.rdd), as.list(mockFile)) + expect_equal(collectRDD(ser.rdd), as.list(mockFile)) expect_equal(getSerializedMode(ser.rdd), "byte") unlink(fileName) @@ -128,7 +128,7 @@ test_that("cleanClosure on R functions", { env <- environment(newF) expect_equal(ls(env), "t") expect_equal(get("t", envir = env, inherits = FALSE), t) - actual <- collect(lapply(rdd, f)) + actual <- collectRDD(lapply(rdd, f)) expected <- as.list(c(rep(FALSE, 4), rep(TRUE, 6))) expect_equal(actual, expected) -- cgit v1.2.3