[SPARK-9324] [SPARK-9322] [SPARK-9321] [SPARKR] Some aliases for R-like functions in DataFrames

Adds following aliases: * unique (distinct) * rbind (unionAll): accepts many DataFrames * nrow (count) * ncol * dim * names (columns): along with the replacement function to change names Author: Hossein <hossein@databricks.com> Closes #7764 from falaki/sparkR-alias and squashes the following commits: 56016f5 [Hossein] Updated R documentation 5e4a4d0 [Hossein] Removed extra code f51cbef [Hossein] Merge branch 'master' into sparkR-alias c1b88bd [Hossein] Moved setGeneric and other comments applied d9307f8 [Hossein] Added tests b5aa988 [Hossein] Added dim, ncol, nrow, names, rbind, and unique functions to DataFrames
author: Hossein <hossein@databricks.com> 2015-07-31 14:07:41 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-07-31 14:08:18 -0700
commit: 710c2b5dd2dc6b8d947303ad8dfae4539b63fe11 (patch)
tree: a7f581717a60841fbbf621306acaf2e4df11b0a2 /R/pkg/R/DataFrame.R
parent: 82f47b811607a1eeeecba437fe0ffc15d4e5f9ec (diff)
download: spark-710c2b5dd2dc6b8d947303ad8dfae4539b63fe11.tar.gz
spark-710c2b5dd2dc6b8d947303ad8dfae4539b63fe11.tar.bz2
spark-710c2b5dd2dc6b8d947303ad8dfae4539b63fe11.zip
1 files changed, 90 insertions, 0 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b31ad3729e..b4065d2944 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -255,6 +255,16 @@ setMethod("names",
             columns(x)
           })
 
+#' @rdname columns
+setMethod("names<-",
+          signature(x = "DataFrame"),
+          function(x, value) {
+            if (!is.null(value)) {
+              sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
+              dataFrame(sdf)
+            }
+          })
+
 #' Register Temporary Table
 #'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
@@ -473,6 +483,18 @@ setMethod("distinct",
             dataFrame(sdf)
           })
 
+#' @title Distinct rows in a DataFrame
+#
+#' @description Returns a new DataFrame containing distinct rows in this DataFrame
+#'
+#' @rdname unique
+#' @aliases unique
+setMethod("unique",
+          signature(x = "DataFrame"),
+          function(x) {
+            distinct(x)
+          })
+
 #' Sample
 #'
 #' Return a sampled subset of this DataFrame using a random seed.
@@ -534,6 +556,58 @@ setMethod("count",
             callJMethod(x@sdf, "count")
           })
 
+#' @title Number of rows for a DataFrame
+#' @description Returns number of rows in a DataFrames
+#'
+#' @name nrow
+#'
+#' @rdname nrow
+#' @aliases count
+setMethod("nrow",
+          signature(x = "DataFrame"),
+          function(x) {
+            count(x)
+          })
+
+#' Returns the number of columns in a DataFrame
+#'
+#' @param x a SparkSQL DataFrame
+#'
+#' @rdname ncol
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlContext, path)
+#' ncol(df)
+#' }
+setMethod("ncol",
+          signature(x = "DataFrame"),
+          function(x) {
+            length(columns(x))
+          })
+
+#' Returns the dimentions (number of rows and columns) of a DataFrame
+#' @param x a SparkSQL DataFrame
+#'
+#' @rdname dim
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlContext, path)
+#' dim(df)
+#' }
+setMethod("dim",
+          signature(x = "DataFrame"),
+          function(x) {
+            c(count(x), ncol(x))
+          })
+
 #' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
 #'
 #' @param x A SparkSQL DataFrame
@@ -1231,6 +1305,22 @@ setMethod("unionAll",
             dataFrame(unioned)
           })
 
+#' @title Union two or more DataFrames
+#
+#' @description Returns a new DataFrame containing rows of all parameters.
+#
+#' @rdname rbind
+#' @aliases unionAll
+setMethod("rbind",
+          signature(... = "DataFrame"),
+          function(x, ..., deparse.level = 1) {
+            if (nargs() == 3) {
+              unionAll(x, ...)
+            } else {
+              unionAll(x, Recall(..., deparse.level = 1))
+            }
+          })
+
 #' Intersect
 #'
 #' Return a new DataFrame containing rows only in both this DataFrame
author	Hossein <hossein@databricks.com>	2015-07-31 14:07:41 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-07-31 14:08:18 -0700
commit	710c2b5dd2dc6b8d947303ad8dfae4539b63fe11 (patch)
tree	a7f581717a60841fbbf621306acaf2e4df11b0a2 /R/pkg/R/DataFrame.R
parent	82f47b811607a1eeeecba437fe0ffc15d4e5f9ec (diff)
download	spark-710c2b5dd2dc6b8d947303ad8dfae4539b63fe11.tar.gz spark-710c2b5dd2dc6b8d947303ad8dfae4539b63fe11.tar.bz2 spark-710c2b5dd2dc6b8d947303ad8dfae4539b63fe11.zip