aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu ISHIKAWA <yuu.ishikawa@gmail.com>2015-06-22 20:55:38 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-06-22 20:55:38 -0700
commit44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3 (patch)
treeb90c21d783c6de5b4195c42d5b387deec41e299e
parentc4d2343966cbae40a8271a2e6cad66227d2f8249 (diff)
downloadspark-44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3.tar.gz
spark-44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3.tar.bz2
spark-44fa7df64daa55bd6eb1f2c219a9701b34e1c2a3.zip
[SPARK-8548] [SPARKR] Remove the trailing whitespaces from the SparkR files
[[SPARK-8548] Remove the trailing whitespaces from the SparkR files - ASF JIRA](https://issues.apache.org/jira/browse/SPARK-8548) - This is the result of `lint-r` https://gist.github.com/yu-iskw/0019b37a2c1167f33986 Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com> Closes #6945 from yu-iskw/SPARK-8548 and squashes the following commits: 0bd567a [Yu ISHIKAWA] [SPARK-8548][SparkR] Remove the trailing whitespaces from the SparkR files
-rw-r--r--R/pkg/R/DataFrame.R96
-rw-r--r--R/pkg/R/RDD.R48
-rw-r--r--R/pkg/R/SQLContext.R14
-rw-r--r--R/pkg/R/broadcast.R6
-rw-r--r--R/pkg/R/deserialize.R2
-rw-r--r--R/pkg/R/generics.R15
-rw-r--r--R/pkg/R/group.R1
-rw-r--r--R/pkg/R/jobj.R2
-rw-r--r--R/pkg/R/pairRDD.R4
-rw-r--r--R/pkg/R/schema.R2
-rw-r--r--R/pkg/R/serialize.R2
-rw-r--r--R/pkg/R/sparkR.R6
-rw-r--r--R/pkg/R/utils.R48
-rw-r--r--R/pkg/R/zzz.R1
-rw-r--r--R/pkg/inst/tests/test_binaryFile.R7
-rw-r--r--R/pkg/inst/tests/test_binary_function.R28
-rw-r--r--R/pkg/inst/tests/test_rdd.R12
-rw-r--r--R/pkg/inst/tests/test_shuffle.R28
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R28
-rw-r--r--R/pkg/inst/tests/test_take.R1
-rw-r--r--R/pkg/inst/tests/test_textFile.R7
-rw-r--r--R/pkg/inst/tests/test_utils.R12
22 files changed, 182 insertions, 188 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0af5cb8881..6feabf4189 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -38,7 +38,7 @@ setClass("DataFrame",
setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
.Object@env <- new.env()
.Object@env$isCached <- isCached
-
+
.Object@sdf <- sdf
.Object
})
@@ -55,11 +55,11 @@ dataFrame <- function(sdf, isCached = FALSE) {
############################ DataFrame Methods ##############################################
#' Print Schema of a DataFrame
-#'
+#'
#' Prints out the schema in tree format
-#'
+#'
#' @param x A SparkSQL DataFrame
-#'
+#'
#' @rdname printSchema
#' @export
#' @examples
@@ -78,11 +78,11 @@ setMethod("printSchema",
})
#' Get schema object
-#'
+#'
#' Returns the schema of this DataFrame as a structType object.
-#'
+#'
#' @param x A SparkSQL DataFrame
-#'
+#'
#' @rdname schema
#' @export
#' @examples
@@ -100,9 +100,9 @@ setMethod("schema",
})
#' Explain
-#'
+#'
#' Print the logical and physical Catalyst plans to the console for debugging.
-#'
+#'
#' @param x A SparkSQL DataFrame
#' @param extended Logical. If extended is False, explain() only prints the physical plan.
#' @rdname explain
@@ -200,11 +200,11 @@ setMethod("show", "DataFrame",
})
#' DataTypes
-#'
+#'
#' Return all column names and their data types as a list
-#'
+#'
#' @param x A SparkSQL DataFrame
-#'
+#'
#' @rdname dtypes
#' @export
#' @examples
@@ -224,11 +224,11 @@ setMethod("dtypes",
})
#' Column names
-#'
+#'
#' Return all column names as a list
-#'
+#'
#' @param x A SparkSQL DataFrame
-#'
+#'
#' @rdname columns
#' @export
#' @examples
@@ -256,12 +256,12 @@ setMethod("names",
})
#' Register Temporary Table
-#'
+#'
#' Registers a DataFrame as a Temporary Table in the SQLContext
-#'
+#'
#' @param x A SparkSQL DataFrame
#' @param tableName A character vector containing the name of the table
-#'
+#'
#' @rdname registerTempTable
#' @export
#' @examples
@@ -306,11 +306,11 @@ setMethod("insertInto",
})
#' Cache
-#'
+#'
#' Persist with the default storage level (MEMORY_ONLY).
-#'
+#'
#' @param x A SparkSQL DataFrame
-#'
+#'
#' @rdname cache-methods
#' @export
#' @examples
@@ -400,7 +400,7 @@ setMethod("repartition",
signature(x = "DataFrame", numPartitions = "numeric"),
function(x, numPartitions) {
sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
- dataFrame(sdf)
+ dataFrame(sdf)
})
# toJSON
@@ -489,7 +489,7 @@ setMethod("distinct",
#' sqlContext <- sparkRSQL.init(sc)
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
-#' collect(sample(df, FALSE, 0.5))
+#' collect(sample(df, FALSE, 0.5))
#' collect(sample(df, TRUE, 0.5))
#'}
setMethod("sample",
@@ -513,11 +513,11 @@ setMethod("sample_frac",
})
#' Count
-#'
+#'
#' Returns the number of rows in a DataFrame
-#'
+#'
#' @param x A SparkSQL DataFrame
-#'
+#'
#' @rdname count
#' @export
#' @examples
@@ -568,13 +568,13 @@ setMethod("collect",
})
#' Limit
-#'
+#'
#' Limit the resulting DataFrame to the number of rows specified.
-#'
+#'
#' @param x A SparkSQL DataFrame
#' @param num The number of rows to return
#' @return A new DataFrame containing the number of rows specified.
-#'
+#'
#' @rdname limit
#' @export
#' @examples
@@ -593,7 +593,7 @@ setMethod("limit",
})
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
-#'
+#'
#' @rdname take
#' @export
#' @examples
@@ -613,8 +613,8 @@ setMethod("take",
#' Head
#'
-#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
-#' then head() returns the first 6 rows in keeping with the current data.frame
+#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
+#' then head() returns the first 6 rows in keeping with the current data.frame
#' convention in R.
#'
#' @param x A SparkSQL DataFrame
@@ -659,11 +659,11 @@ setMethod("first",
})
# toRDD()
-#
+#
# Converts a Spark DataFrame to an RDD while preserving column names.
-#
+#
# @param x A Spark DataFrame
-#
+#
# @rdname DataFrame
# @export
# @examples
@@ -1167,7 +1167,7 @@ setMethod("where",
#'
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
-#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
+#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
#' Column expression. If joinExpr is omitted, join() wil perform a Cartesian join
#' @param joinType The type of join to perform. The following join types are available:
#' 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'. The default joinType is "inner".
@@ -1303,7 +1303,7 @@ setMethod("except",
#' @param source A name for external data source
#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
#'
-#' @rdname write.df
+#' @rdname write.df
#' @export
#' @examples
#'\dontrun{
@@ -1401,7 +1401,7 @@ setMethod("saveAsTable",
#' @param col A string of name
#' @param ... Additional expressions
#' @return A DataFrame
-#' @rdname describe
+#' @rdname describe
#' @export
#' @examples
#'\dontrun{
@@ -1444,7 +1444,7 @@ setMethod("describe",
#' This overwrites the how parameter.
#' @param cols Optional list of column names to consider.
#' @return A DataFrame
-#'
+#'
#' @rdname nafunctions
#' @export
#' @examples
@@ -1465,7 +1465,7 @@ setMethod("dropna",
if (is.null(minNonNulls)) {
minNonNulls <- if (how == "any") { length(cols) } else { 1 }
}
-
+
naFunctions <- callJMethod(x@sdf, "na")
sdf <- callJMethod(naFunctions, "drop",
as.integer(minNonNulls), listToSeq(as.list(cols)))
@@ -1488,16 +1488,16 @@ setMethod("na.omit",
#' @param value Value to replace null values with.
#' Should be an integer, numeric, character or named list.
#' If the value is a named list, then cols is ignored and
-#' value must be a mapping from column name (character) to
+#' value must be a mapping from column name (character) to
#' replacement value. The replacement value must be an
#' integer, numeric or character.
#' @param cols optional list of column names to consider.
#' Columns specified in cols that do not have matching data
-#' type are ignored. For example, if value is a character, and
+#' type are ignored. For example, if value is a character, and
#' subset contains a non-character column, then the non-character
#' column is simply ignored.
#' @return A DataFrame
-#'
+#'
#' @rdname nafunctions
#' @export
#' @examples
@@ -1515,14 +1515,14 @@ setMethod("fillna",
if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
stop("value should be an integer, numeric, charactor or named list.")
}
-
+
if (class(value) == "list") {
# Check column names in the named list
colNames <- names(value)
if (length(colNames) == 0 || !all(colNames != "")) {
stop("value should be an a named list with each name being a column name.")
}
-
+
# Convert to the named list to an environment to be passed to JVM
valueMap <- new.env()
for (col in colNames) {
@@ -1533,19 +1533,19 @@ setMethod("fillna",
}
valueMap[[col]] <- v
}
-
+
# When value is a named list, caller is expected not to pass in cols
if (!is.null(cols)) {
warning("When value is a named list, cols is ignored!")
cols <- NULL
}
-
+
value <- valueMap
} else if (is.integer(value)) {
# Cast an integer to a numeric
value <- as.numeric(value)
}
-
+
naFunctions <- callJMethod(x@sdf, "na")
sdf <- if (length(cols) == 0) {
callJMethod(naFunctions, "fill", value)
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 0513299515..89511141d3 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -48,7 +48,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
# byte: The RDD stores data serialized in R.
# string: The RDD stores data as strings.
# row: The RDD stores the serialized rows of a DataFrame.
-
+
# We use an environment to store mutable states inside an RDD object.
# Note that R's call-by-value semantics makes modifying slots inside an
# object (passed as an argument into a function, such as cache()) difficult:
@@ -363,7 +363,7 @@ setMethod("collectPartition",
# @description
# \code{collectAsMap} returns a named list as a map that contains all of the elements
-# in a key-value pair RDD.
+# in a key-value pair RDD.
# @examples
#\dontrun{
# sc <- sparkR.init()
@@ -666,7 +666,7 @@ setMethod("minimum",
# rdd <- parallelize(sc, 1:10)
# sumRDD(rdd) # 55
#}
-# @rdname sumRDD
+# @rdname sumRDD
# @aliases sumRDD,RDD
setMethod("sumRDD",
signature(x = "RDD"),
@@ -1090,11 +1090,11 @@ setMethod("sortBy",
# Return:
# A list of the first N elements from the RDD in the specified order.
#
-takeOrderedElem <- function(x, num, ascending = TRUE) {
+takeOrderedElem <- function(x, num, ascending = TRUE) {
if (num <= 0L) {
return(list())
}
-
+
partitionFunc <- function(part) {
if (num < length(part)) {
# R limitation: order works only on primitive types!
@@ -1152,7 +1152,7 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
# @aliases takeOrdered,RDD,RDD-method
setMethod("takeOrdered",
signature(x = "RDD", num = "integer"),
- function(x, num) {
+ function(x, num) {
takeOrderedElem(x, num)
})
@@ -1173,7 +1173,7 @@ setMethod("takeOrdered",
# @aliases top,RDD,RDD-method
setMethod("top",
signature(x = "RDD", num = "integer"),
- function(x, num) {
+ function(x, num) {
takeOrderedElem(x, num, FALSE)
})
@@ -1181,7 +1181,7 @@ setMethod("top",
#
# Aggregate the elements of each partition, and then the results for all the
# partitions, using a given associative function and a neutral "zero value".
-#
+#
# @param x An RDD.
# @param zeroValue A neutral "zero value".
# @param op An associative function for the folding operation.
@@ -1207,7 +1207,7 @@ setMethod("fold",
#
# Aggregate the elements of each partition, and then the results for all the
# partitions, using given combine functions and a neutral "zero value".
-#
+#
# @param x An RDD.
# @param zeroValue A neutral "zero value".
# @param seqOp A function to aggregate the RDD elements. It may return a different
@@ -1230,11 +1230,11 @@ setMethod("fold",
# @aliases aggregateRDD,RDD,RDD-method
setMethod("aggregateRDD",
signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = "ANY"),
- function(x, zeroValue, seqOp, combOp) {
+ function(x, zeroValue, seqOp, combOp) {
partitionFunc <- function(part) {
Reduce(seqOp, part, zeroValue)
}
-
+
partitionList <- collect(lapplyPartition(x, partitionFunc),
flatten = FALSE)
Reduce(combOp, partitionList, zeroValue)
@@ -1330,7 +1330,7 @@ setMethod("setName",
#\dontrun{
# sc <- sparkR.init()
# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-# collect(zipWithUniqueId(rdd))
+# collect(zipWithUniqueId(rdd))
# # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
#}
# @rdname zipWithUniqueId
@@ -1426,7 +1426,7 @@ setMethod("glom",
partitionFunc <- function(part) {
list(part)
}
-
+
lapplyPartition(x, partitionFunc)
})
@@ -1498,16 +1498,16 @@ setMethod("zipRDD",
# The jrdd's elements are of scala Tuple2 type. The serialized
# flag here is used for the elements inside the tuples.
rdd <- RDD(jrdd, getSerializedMode(rdds[[1]]))
-
+
mergePartitions(rdd, TRUE)
})
# Cartesian product of this RDD and another one.
#
-# Return the Cartesian product of this RDD and another one,
-# that is, the RDD of all pairs of elements (a, b) where a
+# Return the Cartesian product of this RDD and another one,
+# that is, the RDD of all pairs of elements (a, b) where a
# is in this and b is in other.
-#
+#
# @param x An RDD.
# @param other An RDD.
# @return A new RDD which is the Cartesian product of these two RDDs.
@@ -1515,7 +1515,7 @@ setMethod("zipRDD",
#\dontrun{
# sc <- sparkR.init()
# rdd <- parallelize(sc, 1:2)
-# sortByKey(cartesian(rdd, rdd))
+# sortByKey(cartesian(rdd, rdd))
# # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
#}
# @rdname cartesian
@@ -1528,7 +1528,7 @@ setMethod("cartesian",
# The jrdd's elements are of scala Tuple2 type. The serialized
# flag here is used for the elements inside the tuples.
rdd <- RDD(jrdd, getSerializedMode(rdds[[1]]))
-
+
mergePartitions(rdd, FALSE)
})
@@ -1598,11 +1598,11 @@ setMethod("intersection",
# Zips an RDD's partitions with one (or more) RDD(s).
# Same as zipPartitions in Spark.
-#
+#
# @param ... RDDs to be zipped.
# @param func A function to transform zipped partitions.
-# @return A new RDD by applying a function to the zipped partitions.
-# Assumes that all the RDDs have the *same number of partitions*, but
+# @return A new RDD by applying a function to the zipped partitions.
+# Assumes that all the RDDs have the *same number of partitions*, but
# does *not* require them to have the same number of elements in each partition.
# @examples
#\dontrun{
@@ -1610,7 +1610,7 @@ setMethod("intersection",
# rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2
# rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4
# rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6
-# collect(zipPartitions(rdd1, rdd2, rdd3,
+# collect(zipPartitions(rdd1, rdd2, rdd3,
# func = function(x, y, z) { list(list(x, y, z))} ))
# # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
#}
@@ -1627,7 +1627,7 @@ setMethod("zipPartitions",
if (length(unique(nPart)) != 1) {
stop("Can only zipPartitions RDDs which have the same number of partitions.")
}
-
+
rrdds <- lapply(rrdds, function(rdd) {
mapPartitionsWithIndex(rdd, function(partIndex, part) {
print(length(part))
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 22a4b5bf86..9a743a3411 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -182,7 +182,7 @@ setMethod("toDF", signature(x = "RDD"),
#' Create a DataFrame from a JSON file.
#'
-#' Loads a JSON file (one object per line), returning the result as a DataFrame
+#' Loads a JSON file (one object per line), returning the result as a DataFrame
#' It goes through the entire dataset once to determine the schema.
#'
#' @param sqlContext SQLContext to use
@@ -238,7 +238,7 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
#' Create a DataFrame from a Parquet file.
-#'
+#'
#' Loads a Parquet file, returning the result as a DataFrame.
#'
#' @param sqlContext SQLContext to use
@@ -278,7 +278,7 @@ sql <- function(sqlContext, sqlQuery) {
}
#' Create a DataFrame from a SparkSQL Table
-#'
+#'
#' Returns the specified Table as a DataFrame. The Table must have already been registered
#' in the SQLContext.
#'
@@ -298,7 +298,7 @@ sql <- function(sqlContext, sqlQuery) {
table <- function(sqlContext, tableName) {
sdf <- callJMethod(sqlContext, "table", tableName)
- dataFrame(sdf)
+ dataFrame(sdf)
}
@@ -352,7 +352,7 @@ tableNames <- function(sqlContext, databaseName = NULL) {
#' Cache Table
-#'
+#'
#' Caches the specified table in-memory.
#'
#' @param sqlContext SQLContext to use
@@ -370,11 +370,11 @@ tableNames <- function(sqlContext, databaseName = NULL) {
#' }
cacheTable <- function(sqlContext, tableName) {
- callJMethod(sqlContext, "cacheTable", tableName)
+ callJMethod(sqlContext, "cacheTable", tableName)
}
#' Uncache Table
-#'
+#'
#' Removes the specified table from the in-memory cache.
#'
#' @param sqlContext SQLContext to use
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
index 23dc387807..2403925b26 100644
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@@ -27,9 +27,9 @@
# @description Broadcast variables can be created using the broadcast
# function from a \code{SparkContext}.
# @rdname broadcast-class
-# @seealso broadcast
+# @seealso broadcast
#
-# @param id Id of the backing Spark broadcast variable
+# @param id Id of the backing Spark broadcast variable
# @export
setClass("Broadcast", slots = list(id = "character"))
@@ -68,7 +68,7 @@ setMethod("value",
# variable on workers. Not intended for use outside the package.
#
# @rdname broadcast-internal
-# @seealso broadcast, value
+# @seealso broadcast, value
# @param bcastId The id of broadcast variable to set
# @param value The value to be set
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 257b435607..d961bbc383 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -18,7 +18,7 @@
# Utility functions to deserialize objects from Java.
# Type mapping from Java to R
-#
+#
# void -> NULL
# Int -> integer
# String -> character
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 12e09176c9..79055b7f18 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -130,7 +130,7 @@ setGeneric("maximum", function(x) { standardGeneric("maximum") })
# @export
setGeneric("minimum", function(x) { standardGeneric("minimum") })
-# @rdname sumRDD
+# @rdname sumRDD
# @export
setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
@@ -219,7 +219,7 @@ setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
# @rdname zipRDD
# @export
-setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") },
+setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") },
signature = "...")
# @rdname zipWithIndex
@@ -364,7 +364,7 @@ setGeneric("subtract",
# @rdname subtractByKey
# @export
-setGeneric("subtractByKey",
+setGeneric("subtractByKey",
function(x, other, numPartitions = 1) {
standardGeneric("subtractByKey")
})
@@ -399,15 +399,15 @@ setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
#' @rdname nafunctions
#' @export
setGeneric("dropna",
- function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
- standardGeneric("dropna")
+ function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+ standardGeneric("dropna")
})
#' @rdname nafunctions
#' @export
setGeneric("na.omit",
- function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
- standardGeneric("na.omit")
+ function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+ standardGeneric("na.omit")
})
#' @rdname schema
@@ -656,4 +656,3 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
#' @rdname column
#' @export
setGeneric("upper", function(x) { standardGeneric("upper") })
-
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index b758481997..8f1c68f7c4 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -136,4 +136,3 @@ createMethods <- function() {
}
createMethods()
-
diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R
index a8a25230b6..0838a7bb35 100644
--- a/R/pkg/R/jobj.R
+++ b/R/pkg/R/jobj.R
@@ -16,7 +16,7 @@
#
# References to objects that exist on the JVM backend
-# are maintained using the jobj.
+# are maintained using the jobj.
#' @include generics.R
NULL
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index 1e24286dbc..7f902ba8e6 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -784,7 +784,7 @@ setMethod("sortByKey",
newRDD <- partitionBy(x, numPartitions, rangePartitionFunc)
lapplyPartition(newRDD, partitionFunc)
})
-
+
# Subtract a pair RDD with another pair RDD.
#
# Return an RDD with the pairs from x whose keys are not in other.
@@ -820,7 +820,7 @@ setMethod("subtractByKey",
})
# Return a subset of this RDD sampled by key.
-#
+#
# @description
# \code{sampleByKey} Create a sample of this RDD using variable sampling rates
# for different keys as specified by fractions, a key to sampling rate map.
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index e442119086..15e2bdbd55 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -20,7 +20,7 @@
#' structType
#'
-#' Create a structType object that contains the metadata for a DataFrame. Intended for
+#' Create a structType object that contains the metadata for a DataFrame. Intended for
#' use with createDataFrame and toDF.
#'
#' @param x a structField object (created with the field() function)
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 3169d7968f..78535eff0d 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -175,7 +175,7 @@ writeGenericList <- function(con, list) {
writeObject(con, elem)
}
}
-
+
# Used to pass in hash maps required on Java side.
writeEnv <- function(con, env) {
len <- length(env)
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 2efd4f0742..dbde0c44c5 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -43,7 +43,7 @@ sparkR.stop <- function() {
callJMethod(sc, "stop")
rm(".sparkRjsc", envir = env)
}
-
+
if (exists(".backendLaunched", envir = env)) {
callJStatic("SparkRHandler", "stopBackend")
}
@@ -174,7 +174,7 @@ sparkR.init <- function(
for (varname in names(sparkEnvir)) {
sparkEnvirMap[[varname]] <- sparkEnvir[[varname]]
}
-
+
sparkExecutorEnvMap <- new.env()
if (!any(names(sparkExecutorEnv) == "LD_LIBRARY_PATH")) {
sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
@@ -214,7 +214,7 @@ sparkR.init <- function(
#' Initialize a new SQLContext.
#'
-#' This function creates a SparkContext from an existing JavaSparkContext and
+#' This function creates a SparkContext from an existing JavaSparkContext and
#' then uses it to initialize a new SQLContext
#'
#' @param jsc The existing JavaSparkContext created with SparkR.init()
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 69b2700191..13cec0f712 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -368,21 +368,21 @@ listToSeq <- function(l) {
}
# Utility function to recursively traverse the Abstract Syntax Tree (AST) of a
-# user defined function (UDF), and to examine variables in the UDF to decide
+# user defined function (UDF), and to examine variables in the UDF to decide
# if their values should be included in the new function environment.
# param
# node The current AST node in the traversal.
# oldEnv The original function environment.
# defVars An Accumulator of variables names defined in the function's calling environment,
# including function argument and local variable names.
-# checkedFunc An environment of function objects examined during cleanClosure. It can
+# checkedFunc An environment of function objects examined during cleanClosure. It can
# be considered as a "name"-to-"list of functions" mapping.
# newEnv A new function environment to store necessary function dependencies, an output argument.
processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
nodeLen <- length(node)
-
+
if (nodeLen > 1 && typeof(node) == "language") {
- # Recursive case: current AST node is an internal node, check for its children.
+ # Recursive case: current AST node is an internal node, check for its children.
if (length(node[[1]]) > 1) {
for (i in 1:nodeLen) {
processClosure(node[[i]], oldEnv, defVars, checkedFuncs, newEnv)
@@ -393,7 +393,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
for (i in 2:nodeLen) {
processClosure(node[[i]], oldEnv, defVars, checkedFuncs, newEnv)
}
- } else if (nodeChar == "<-" || nodeChar == "=" ||
+ } else if (nodeChar == "<-" || nodeChar == "=" ||
nodeChar == "<<-") { # Assignment Ops.
defVar <- node[[2]]
if (length(defVar) == 1 && typeof(defVar) == "symbol") {
@@ -422,21 +422,21 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
}
}
}
- } else if (nodeLen == 1 &&
+ } else if (nodeLen == 1 &&
(typeof(node) == "symbol" || typeof(node) == "language")) {
# Base case: current AST node is a leaf node and a symbol or a function call.
nodeChar <- as.character(node)
if (!nodeChar %in% defVars$data) { # Not a function parameter or local variable.
func.env <- oldEnv
topEnv <- parent.env(.GlobalEnv)
- # Search in function environment, and function's enclosing environments
+ # Search in function environment, and function's enclosing environments
# up to global environment. There is no need to look into package environments
- # above the global or namespace environment that is not SparkR below the global,
+ # above the global or namespace environment that is not SparkR below the global,
# as they are assumed to be loaded on workers.
while (!identical(func.env, topEnv)) {
# Namespaces other than "SparkR" will not be searched.
- if (!isNamespace(func.env) ||
- (getNamespaceName(func.env) == "SparkR" &&
+ if (!isNamespace(func.env) ||
+ (getNamespaceName(func.env) == "SparkR" &&
!(nodeChar %in% getNamespaceExports("SparkR")))) { # Only include SparkR internals.
# Set parameter 'inherits' to FALSE since we do not need to search in
# attached package environments.
@@ -444,7 +444,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
error = function(e) { FALSE })) {
obj <- get(nodeChar, envir = func.env, inherits = FALSE)
if (is.function(obj)) { # If the node is a function call.
- funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F,
+ funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F,
ifnotfound = list(list(NULL)))[[1]]
found <- sapply(funcList, function(func) {
ifelse(identical(func, obj), TRUE, FALSE)
@@ -453,7 +453,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
break
}
# Function has not been examined, record it and recursively clean its closure.
- assign(nodeChar,
+ assign(nodeChar,
if (is.null(funcList[[1]])) {
list(obj)
} else {
@@ -466,7 +466,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
break
}
}
-
+
# Continue to search in enclosure.
func.env <- parent.env(func.env)
}
@@ -474,8 +474,8 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
}
}
-# Utility function to get user defined function (UDF) dependencies (closure).
-# More specifically, this function captures the values of free variables defined
+# Utility function to get user defined function (UDF) dependencies (closure).
+# More specifically, this function captures the values of free variables defined
# outside a UDF, and stores them in the function's environment.
# param
# func A function whose closure needs to be captured.
@@ -488,7 +488,7 @@ cleanClosure <- function(func, checkedFuncs = new.env()) {
newEnv <- new.env(parent = .GlobalEnv)
func.body <- body(func)
oldEnv <- environment(func)
- # defVars is an Accumulator of variables names defined in the function's calling
+ # defVars is an Accumulator of variables names defined in the function's calling
# environment. First, function's arguments are added to defVars.
defVars <- initAccumulator()
argNames <- names(as.list(args(func)))
@@ -509,15 +509,15 @@ cleanClosure <- function(func, checkedFuncs = new.env()) {
# return value
# A list of two result RDDs.
appendPartitionLengths <- function(x, other) {
- if (getSerializedMode(x) != getSerializedMode(other) ||
+ if (getSerializedMode(x) != getSerializedMode(other) ||
getSerializedMode(x) == "byte") {
# Append the number of elements in each partition to that partition so that we can later
# know the boundary of elements from x and other.
#
- # Note that this appending also serves the purpose of reserialization, because even if
+ # Note that this appending also serves the purpose of reserialization, because even if
# any RDD is serialized, we need to reserialize it to make sure its partitions are encoded
# as a single byte array. For example, partitions of an RDD generated from partitionBy()
- # may be encoded as multiple byte arrays.
+ # may be encoded as multiple byte arrays.
appendLength <- function(part) {
len <- length(part)
part[[len + 1]] <- len + 1
@@ -544,23 +544,23 @@ mergePartitions <- function(rdd, zip) {
lengthOfValues <- part[[len]]
lengthOfKeys <- part[[len - lengthOfValues]]
stopifnot(len == lengthOfKeys + lengthOfValues)
-
+
# For zip operation, check if corresponding partitions of both RDDs have the same number of elements.
if (zip && lengthOfKeys != lengthOfValues) {
stop("Can only zip RDDs with same number of elements in each pair of corresponding partitions.")
}
-
+
if (lengthOfKeys > 1) {
keys <- part[1 : (lengthOfKeys - 1)]
} else {
keys <- list()
}
if (lengthOfValues > 1) {
- values <- part[(lengthOfKeys + 1) : (len - 1)]
+ values <- part[(lengthOfKeys + 1) : (len - 1)]
} else {
values <- list()
}
-
+
if (!zip) {
return(mergeCompactLists(keys, values))
}
@@ -578,6 +578,6 @@ mergePartitions <- function(rdd, zip) {
part
}
}
-
+
PipelinedRDD(rdd, partitionFunc)
}
diff --git a/R/pkg/R/zzz.R b/R/pkg/R/zzz.R
index 80d796d467..301feade65 100644
--- a/R/pkg/R/zzz.R
+++ b/R/pkg/R/zzz.R
@@ -18,4 +18,3 @@
.onLoad <- function(libname, pkgname) {
sparkR.onLoad(libname, pkgname)
}
-
diff --git a/R/pkg/inst/tests/test_binaryFile.R b/R/pkg/inst/tests/test_binaryFile.R
index ca4218f381..4db7266abc 100644
--- a/R/pkg/inst/tests/test_binaryFile.R
+++ b/R/pkg/inst/tests/test_binaryFile.R
@@ -59,15 +59,15 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
wordCount <- lapply(words, function(word) { list(word, 1L) })
counts <- reduceByKey(wordCount, "+", 2L)
-
+
saveAsObjectFile(counts, fileName2)
counts <- objectFile(sc, fileName2)
-
+
output <- collect(counts)
expected <- list(list("awesome.", 1), list("Spark", 2), list("pretty.", 1),
list("is", 2))
expect_equal(sortKeyValueList(output), sortKeyValueList(expected))
-
+
unlink(fileName1)
unlink(fileName2, recursive = TRUE)
})
@@ -87,4 +87,3 @@ test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
unlink(fileName1, recursive = TRUE)
unlink(fileName2, recursive = TRUE)
})
-
diff --git a/R/pkg/inst/tests/test_binary_function.R b/R/pkg/inst/tests/test_binary_function.R
index 6785a7bdae..a1e354e567 100644
--- a/R/pkg/inst/tests/test_binary_function.R
+++ b/R/pkg/inst/tests/test_binary_function.R
@@ -30,7 +30,7 @@ mockFile <- c("Spark is pretty.", "Spark is awesome.")
test_that("union on two RDDs", {
actual <- collect(unionRDD(rdd, rdd))
expect_equal(actual, as.list(rep(nums, 2)))
-
+
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
writeLines(mockFile, fileName)
@@ -52,14 +52,14 @@ test_that("union on two RDDs", {
test_that("cogroup on two RDDs", {
rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
- cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
+ cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
actual <- collect(cogroup.rdd)
- expect_equal(actual,
+ expect_equal(actual,
list(list(1, list(list(1), list(2, 3))), list(2, list(list(4), list()))))
-
+
rdd1 <- parallelize(sc, list(list("a", 1), list("a", 4)))
rdd2 <- parallelize(sc, list(list("b", 2), list("a", 3)))
- cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
+ cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
actual <- collect(cogroup.rdd)
expected <- list(list("b", list(list(), list(2))), list("a", list(list(1, 4), list(3))))
@@ -71,31 +71,31 @@ test_that("zipPartitions() on RDDs", {
rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2
rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4
rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6
- actual <- collect(zipPartitions(rdd1, rdd2, rdd3,
+ actual <- collect(zipPartitions(rdd1, rdd2, rdd3,
func = function(x, y, z) { list(list(x, y, z))} ))
expect_equal(actual,
list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6))))
-
+
mockFile = c("Spark is pretty.", "Spark is awesome.")
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
writeLines(mockFile, fileName)
-
+
rdd <- textFile(sc, fileName, 1)
- actual <- collect(zipPartitions(rdd, rdd,
+ actual <- collect(zipPartitions(rdd, rdd,
func = function(x, y) { list(paste(x, y, sep = "\n")) }))
expected <- list(paste(mockFile, mockFile, sep = "\n"))
expect_equal(actual, expected)
-
+
rdd1 <- parallelize(sc, 0:1, 1)
- actual <- collect(zipPartitions(rdd1, rdd,
+ actual <- collect(zipPartitions(rdd1, rdd,
func = function(x, y) { list(x + nchar(y)) }))
expected <- list(0:1 + nchar(mockFile))
expect_equal(actual, expected)
-
+
rdd <- map(rdd, function(x) { x })
- actual <- collect(zipPartitions(rdd, rdd1,
+ actual <- collect(zipPartitions(rdd, rdd1,
func = function(x, y) { list(y + nchar(x)) }))
expect_equal(actual, expected)
-
+
unlink(fileName)
})
diff --git a/R/pkg/inst/tests/test_rdd.R b/R/pkg/inst/tests/test_rdd.R
index 03207353c3..4fe6538567 100644
--- a/R/pkg/inst/tests/test_rdd.R
+++ b/R/pkg/inst/tests/test_rdd.R
@@ -477,7 +477,7 @@ test_that("cartesian() on RDDs", {
list(1, 1), list(1, 2), list(1, 3),
list(2, 1), list(2, 2), list(2, 3),
list(3, 1), list(3, 2), list(3, 3)))
-
+
# test case where one RDD is empty
emptyRdd <- parallelize(sc, list())
actual <- collect(cartesian(rdd, emptyRdd))
@@ -486,7 +486,7 @@ test_that("cartesian() on RDDs", {
mockFile = c("Spark is pretty.", "Spark is awesome.")
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
writeLines(mockFile, fileName)
-
+
rdd <- textFile(sc, fileName)
actual <- collect(cartesian(rdd, rdd))
expected <- list(
@@ -495,7 +495,7 @@ test_that("cartesian() on RDDs", {
list("Spark is pretty.", "Spark is pretty."),
list("Spark is pretty.", "Spark is awesome."))
expect_equal(sortKeyValueList(actual), expected)
-
+
rdd1 <- parallelize(sc, 0:1)
actual <- collect(cartesian(rdd1, rdd))
expect_equal(sortKeyValueList(actual),
@@ -504,11 +504,11 @@ test_that("cartesian() on RDDs", {
list(0, "Spark is awesome."),
list(1, "Spark is pretty."),
list(1, "Spark is awesome.")))
-
+
rdd1 <- map(rdd, function(x) { x })
actual <- collect(cartesian(rdd, rdd1))
expect_equal(sortKeyValueList(actual), expected)
-
+
unlink(fileName)
})
@@ -760,7 +760,7 @@ test_that("collectAsMap() on a pairwise RDD", {
})
test_that("show()", {
- rdd <- parallelize(sc, list(1:10))
+ rdd <- parallelize(sc, list(1:10))
expect_output(show(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+")
})
diff --git a/R/pkg/inst/tests/test_shuffle.R b/R/pkg/inst/tests/test_shuffle.R
index d7dedda553..adf0b91d25 100644
--- a/R/pkg/inst/tests/test_shuffle.R
+++ b/R/pkg/inst/tests/test_shuffle.R
@@ -106,39 +106,39 @@ test_that("aggregateByKey", {
zeroValue <- list(0, 0)
seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
- aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
-
+ aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
+
actual <- collect(aggregatedRDD)
-
+
expected <- list(list(1, list(3, 2)), list(2, list(7, 2)))
expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
# test aggregateByKey for string keys
rdd <- parallelize(sc, list(list("a", 1), list("a", 2), list("b", 3), list("b", 4)))
-
+
zeroValue <- list(0, 0)
seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
- aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
+ aggregatedRDD <- aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
actual <- collect(aggregatedRDD)
-
+
expected <- list(list("a", list(3, 2)), list("b", list(7, 2)))
expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
})
-test_that("foldByKey", {
+test_that("foldByKey", {
# test foldByKey for int keys
folded <- foldByKey(intRdd, 0, "+", 2L)
-
+
actual <- collect(folded)
-
+
expected <- list(list(2L, 101), list(1L, 199))
expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
# test foldByKey for double keys
folded <- foldByKey(doubleRdd, 0, "+", 2L)
-
+
actual <- collect(folded)
expected <- list(list(1.5, 199), list(2.5, 101))
@@ -146,15 +146,15 @@ test_that("foldByKey", {
# test foldByKey for string keys
stringKeyPairs <- list(list("a", -1), list("b", 100), list("b", 1), list("a", 200))
-
+
stringKeyRDD <- parallelize(sc, stringKeyPairs)
folded <- foldByKey(stringKeyRDD, 0, "+", 2L)
-
+
actual <- collect(folded)
-
+
expected <- list(list("b", 101), list("a", 199))
expect_equal(sortKeyValueList(actual), sortKeyValueList(expected))
-
+
# test foldByKey for empty pair RDD
rdd <- parallelize(sc, list())
folded <- foldByKey(rdd, 0, "+", 2L)
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 8946348ef8..fc7f3f074b 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -67,7 +67,7 @@ test_that("structType and structField", {
expect_true(inherits(testField, "structField"))
expect_true(testField$name() == "a")
expect_true(testField$nullable())
-
+
testSchema <- structType(testField, structField("b", "integer"))
expect_true(inherits(testSchema, "structType"))
expect_true(inherits(testSchema$fields()[[2]], "structField"))
@@ -598,7 +598,7 @@ test_that("column functions", {
c3 <- lower(c) + upper(c) + first(c) + last(c)
c4 <- approxCountDistinct(c) + countDistinct(c) + cast(c, "string")
c5 <- n(c) + n_distinct(c)
- c5 <- acos(c) + asin(c) + atan(c) + cbrt(c)
+ c5 <- acos(c) + asin(c) + atan(c) + cbrt(c)
c6 <- ceiling(c) + cos(c) + cosh(c) + exp(c) + expm1(c)
c7 <- floor(c) + log(c) + log10(c) + log1p(c) + rint(c)
c8 <- sign(c) + sin(c) + sinh(c) + tan(c) + tanh(c)
@@ -829,7 +829,7 @@ test_that("dropna() on a DataFrame", {
rows <- collect(df)
# drop with columns
-
+
expected <- rows[!is.na(rows$name),]
actual <- collect(dropna(df, cols = "name"))
expect_true(identical(expected, actual))
@@ -842,7 +842,7 @@ test_that("dropna() on a DataFrame", {
expect_true(identical(expected$age, actual$age))
expect_true(identical(expected$height, actual$height))
expect_true(identical(expected$name, actual$name))
-
+
expected <- rows[!is.na(rows$age) & !is.na(rows$height),]
actual <- collect(dropna(df, cols = c("age", "height")))
expect_true(identical(expected, actual))
@@ -850,7 +850,7 @@ test_that("dropna() on a DataFrame", {
expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df))
expect_true(identical(expected, actual))
-
+
# drop with how
expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
@@ -860,7 +860,7 @@ test_that("dropna() on a DataFrame", {
expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),]
actual <- collect(dropna(df, "all"))
expect_true(identical(expected, actual))
-
+
expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df, "any"))
expect_true(identical(expected, actual))
@@ -872,14 +872,14 @@ test_that("dropna() on a DataFrame", {
expected <- rows[!is.na(rows$age) | !is.na(rows$height),]
actual <- collect(dropna(df, "all", cols = c("age", "height")))
expect_true(identical(expected, actual))
-
+
# drop with threshold
-
+
expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2,]
actual <- collect(dropna(df, minNonNulls = 2, cols = c("age", "height")))
- expect_true(identical(expected, actual))
+ expect_true(identical(expected, actual))
- expected <- rows[as.integer(!is.na(rows$age)) +
+ expected <- rows[as.integer(!is.na(rows$age)) +
as.integer(!is.na(rows$height)) +
as.integer(!is.na(rows$name)) >= 3,]
actual <- collect(dropna(df, minNonNulls = 3, cols = c("name", "age", "height")))
@@ -889,9 +889,9 @@ test_that("dropna() on a DataFrame", {
test_that("fillna() on a DataFrame", {
df <- jsonFile(sqlContext, jsonPathNa)
rows <- collect(df)
-
+
# fill with value
-
+
expected <- rows
expected$age[is.na(expected$age)] <- 50
expected$height[is.na(expected$height)] <- 50.6
@@ -912,7 +912,7 @@ test_that("fillna() on a DataFrame", {
expected$name[is.na(expected$name)] <- "unknown"
actual <- collect(fillna(df, "unknown", c("age", "name")))
expect_true(identical(expected, actual))
-
+
# fill with named list
expected <- rows
@@ -920,7 +920,7 @@ test_that("fillna() on a DataFrame", {
expected$height[is.na(expected$height)] <- 50.6
expected$name[is.na(expected$name)] <- "unknown"
actual <- collect(fillna(df, list("age" = 50, "height" = 50.6, "name" = "unknown")))
- expect_true(identical(expected, actual))
+ expect_true(identical(expected, actual))
})
unlink(parquetPath)
diff --git a/R/pkg/inst/tests/test_take.R b/R/pkg/inst/tests/test_take.R
index 7f4c7c315d..c5eb417b40 100644
--- a/R/pkg/inst/tests/test_take.R
+++ b/R/pkg/inst/tests/test_take.R
@@ -64,4 +64,3 @@ test_that("take() gives back the original elements in correct count and order",
expect_true(length(take(numListRDD, 0)) == 0)
expect_true(length(take(numVectorRDD, 0)) == 0)
})
-
diff --git a/R/pkg/inst/tests/test_textFile.R b/R/pkg/inst/tests/test_textFile.R
index 6b87b4b3e0..092ad9dc10 100644
--- a/R/pkg/inst/tests/test_textFile.R
+++ b/R/pkg/inst/tests/test_textFile.R
@@ -58,7 +58,7 @@ test_that("textFile() word count works as expected", {
expected <- list(list("pretty.", 1), list("is", 2), list("awesome.", 1),
list("Spark", 2))
expect_equal(sortKeyValueList(output), sortKeyValueList(expected))
-
+
unlink(fileName)
})
@@ -115,13 +115,13 @@ test_that("textFile() and saveAsTextFile() word count works as expected", {
saveAsTextFile(counts, fileName2)
rdd <- textFile(sc, fileName2)
-
+
output <- collect(rdd)
expected <- list(list("awesome.", 1), list("Spark", 2),
list("pretty.", 1), list("is", 2))
expectedStr <- lapply(expected, function(x) { toString(x) })
expect_equal(sortKeyValueList(output), sortKeyValueList(expectedStr))
-
+
unlink(fileName1)
unlink(fileName2)
})
@@ -159,4 +159,3 @@ test_that("Pipelined operations on RDDs created using textFile", {
unlink(fileName)
})
-
diff --git a/R/pkg/inst/tests/test_utils.R b/R/pkg/inst/tests/test_utils.R
index 539e3a3c19..15030e6f1d 100644
--- a/R/pkg/inst/tests/test_utils.R
+++ b/R/pkg/inst/tests/test_utils.R
@@ -43,13 +43,13 @@ test_that("serializeToBytes on RDD", {
mockFile <- c("Spark is pretty.", "Spark is awesome.")
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
writeLines(mockFile, fileName)
-
+
text.rdd <- textFile(sc, fileName)
expect_true(getSerializedMode(text.rdd) == "string")
ser.rdd <- serializeToBytes(text.rdd)
expect_equal(collect(ser.rdd), as.list(mockFile))
expect_true(getSerializedMode(ser.rdd) == "byte")
-
+
unlink(fileName)
})
@@ -64,7 +64,7 @@ test_that("cleanClosure on R functions", {
expect_equal(actual, y)
actual <- get("g", envir = env, inherits = FALSE)
expect_equal(actual, g)
-
+
# Test for nested enclosures and package variables.
env2 <- new.env()
funcEnv <- new.env(parent = env2)
@@ -106,7 +106,7 @@ test_that("cleanClosure on R functions", {
expect_equal(length(ls(env)), 1)
actual <- get("y", envir = env, inherits = FALSE)
expect_equal(actual, y)
-
+
# Test for function (and variable) definitions.
f <- function(x) {
g <- function(y) { y * 2 }
@@ -115,7 +115,7 @@ test_that("cleanClosure on R functions", {
newF <- cleanClosure(f)
env <- environment(newF)
expect_equal(length(ls(env)), 0) # "y" and "g" should not be included.
-
+
# Test for overriding variables in base namespace (Issue: SparkR-196).
nums <- as.list(1:10)
rdd <- parallelize(sc, nums, 2L)
@@ -128,7 +128,7 @@ test_that("cleanClosure on R functions", {
actual <- collect(lapply(rdd, f))
expected <- as.list(c(rep(FALSE, 4), rep(TRUE, 6)))
expect_equal(actual, expected)
-
+
# Test for broadcast variables.
a <- matrix(nrow=10, ncol=10, data=rnorm(100))
aBroadcast <- broadcast(sc, a)