aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/R/DataFrame.R
diff options
context:
space:
mode:
Diffstat (limited to 'R/pkg/R/DataFrame.R')
-rw-r--r--R/pkg/R/DataFrame.R73
1 files changed, 73 insertions, 0 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 3bf5bc924f..35695b9df1 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2299,3 +2299,76 @@ setMethod("with",
newEnv <- assignNewEnv(data)
eval(substitute(expr), envir = newEnv, enclos = newEnv)
})
+
+#' Display the structure of a DataFrame, including column names, column types, as well as a
+#' a small sample of rows.
+#' @name str
+#' @title Compactly display the structure of a dataset
+#' @rdname str
+#' @family DataFrame functions
+#' @param object a DataFrame
+#' @examples \dontrun{
+#' # Create a DataFrame from the Iris dataset
+#' irisDF <- createDataFrame(sqlContext, iris)
+#'
+#' # Show the structure of the DataFrame
+#' str(irisDF)
+#' }
+setMethod("str",
+ signature(object = "DataFrame"),
+ function(object) {
+
+ # TODO: These could be made global parameters, though in R it's not the case
+ MAX_CHAR_PER_ROW <- 120
+ MAX_COLS <- 100
+
+ # Get the column names and types of the DataFrame
+ names <- names(object)
+ types <- coltypes(object)
+
+ # Get the first elements of the dataset. Limit number of columns accordingly
+ localDF <- if (ncol(object) > MAX_COLS) {
+ head(object[, c(1:MAX_COLS)])
+ } else {
+ head(object)
+ }
+
+ # The number of observations will not be displayed as computing the
+ # number of rows is a very expensive operation
+ cat(paste0("'", class(object), "': ", length(names), " variables:\n"))
+
+ if (nrow(localDF) > 0) {
+ for (i in 1 : ncol(localDF)) {
+ # Get the first elements for each column
+
+ firstElements <- if (types[i] == "character") {
+ paste(paste0("\"", localDF[,i], "\""), collapse = " ")
+ } else {
+ paste(localDF[,i], collapse = " ")
+ }
+
+ # Add the corresponding number of spaces for alignment
+ spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse="")
+
+ # Get the short type. For 'character', it would be 'chr';
+ # 'for numeric', it's 'num', etc.
+ dataType <- SHORT_TYPES[[types[i]]]
+ if (is.null(dataType)) {
+ dataType <- substring(types[i], 1, 3)
+ }
+
+ # Concatenate the colnames, coltypes, and first
+ # elements of each column
+ line <- paste0(" $ ", names[i], spaces, ": ",
+ dataType, " ",firstElements)
+
+ # Chop off extra characters if this is too long
+ cat(substr(line, 1, MAX_CHAR_PER_ROW))
+ cat("\n")
+ }
+
+ if (ncol(localDF) < ncol(object)) {
+ cat(paste0("\nDisplaying first ", ncol(localDF), " columns only."))
+ }
+ }
+ }) \ No newline at end of file