[SPARK-12148][SPARKR] SparkR: rename DataFrame to SparkDataFrame

## What changes were proposed in this pull request? Changed class name defined in R from "DataFrame" to "SparkDataFrame". A popular package, S4Vector already defines "DataFrame" - this change is to avoid conflict. Aside from class name and API/roxygen2 references, SparkR APIs like `createDataFrame`, `as.DataFrame` are not changed (S4Vector does not define a "as.DataFrame"). Since in R, one would rarely reference type/class, this change should have minimal/almost-no impact to a SparkR user in terms of back compat. ## How was this patch tested? SparkR tests, manually loading S4Vector then SparkR package Author: felixcheung <felixcheung_m@hotmail.com> Closes #12621 from felixcheung/rdataframe.
author: felixcheung <felixcheung_m@hotmail.com> 2016-04-23 00:20:27 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-04-23 00:20:27 -0700
commit: a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1 (patch)
tree: 3c29aa4d17cad1c88f6eb989cae5a207077de689 /R
parent: 86ca8fefc8a147b31952b8a00e58e46d93bb8bc4 (diff)
download: spark-a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1.tar.gz
spark-a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1.tar.bz2
spark-a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1.zip
14 files changed, 473 insertions, 468 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b3aff10b7d..0f92b5e597 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -27,7 +27,7 @@ export("setJobGroup",
 # Export Utility methods
 export("setLogLevel")
 
-exportClasses("DataFrame")
+exportClasses("SparkDataFrame")
 
 exportMethods("arrange",
               "as.data.frame",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 95e2eb2be0..69feec735c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -15,21 +15,21 @@
 # limitations under the License.
 #
 
-# DataFrame.R - DataFrame class and methods implemented in S4 OO classes
+# DataFrame.R - SparkDataFrame class and methods implemented in S4 OO classes
 
 #' @include generics.R jobj.R schema.R RDD.R pairRDD.R column.R group.R
 NULL
 
 setOldClass("jobj")
 
-#' @title S4 class that represents a DataFrame
+#' @title S4 class that represents a SparkDataFrame
 #' @description DataFrames can be created using functions like \link{createDataFrame},
 #'              \link{read.json}, \link{table} etc.
-#' @family DataFrame functions
-#' @rdname DataFrame
+#' @family SparkSparkDataFrame functions
+#' @rdname SparkDataFrame
 #' @docType class
 #'
-#' @slot env An R environment that stores bookkeeping states of the DataFrame
+#' @slot env An R environment that stores bookkeeping states of the SparkDataFrame
 #' @slot sdf A Java object reference to the backing Scala DataFrame
 #' @seealso \link{createDataFrame}, \link{read.json}, \link{table}
 #' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes}
@@ -40,11 +40,11 @@ setOldClass("jobj")
 #' sqlContext <- sparkRSQL.init(sc)
 #' df <- createDataFrame(sqlContext, faithful)
 #'}
-setClass("DataFrame",
+setClass("SparkDataFrame",
          slots = list(env = "environment",
                       sdf = "jobj"))
 
-setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
+setMethod("initialize", "SparkDataFrame", function(.Object, sdf, isCached) {
   .Object@env <- new.env()
   .Object@env$isCached <- isCached
 
@@ -52,23 +52,23 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
   .Object
 })
 
-#' @rdname DataFrame
+#' @rdname SparkDataFrame
 #' @export
 #' @param sdf A Java object reference to the backing Scala DataFrame
-#' @param isCached TRUE if the dataFrame is cached
+#' @param isCached TRUE if the SparkDataFrame is cached
 dataFrame <- function(sdf, isCached = FALSE) {
-  new("DataFrame", sdf, isCached)
+  new("SparkDataFrame", sdf, isCached)
 }
 
-############################ DataFrame Methods ##############################################
+############################ SparkDataFrame Methods ##############################################
 
-#' Print Schema of a DataFrame
+#' Print Schema of a SparkDataFrame
 #'
 #' Prints out the schema in tree format
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname printSchema
 #' @name printSchema
 #' @export
@@ -81,7 +81,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
 #' printSchema(df)
 #'}
 setMethod("printSchema",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             schemaString <- callJMethod(schema(x)$jobj, "treeString")
             cat(schemaString)
@@ -89,11 +89,11 @@ setMethod("printSchema",
 
 #' Get schema object
 #'
-#' Returns the schema of this DataFrame as a structType object.
+#' Returns the schema of this SparkDataFrame as a structType object.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname schema
 #' @name schema
 #' @export
@@ -106,7 +106,7 @@ setMethod("printSchema",
 #' dfSchema <- schema(df)
 #'}
 setMethod("schema",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             structType(callJMethod(x@sdf, "schema"))
           })
@@ -115,9 +115,9 @@ setMethod("schema",
 #'
 #' Print the logical and physical Catalyst plans to the console for debugging.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param extended Logical. If extended is False, explain() only prints the physical plan.
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname explain
 #' @name explain
 #' @export
@@ -130,7 +130,7 @@ setMethod("schema",
 #' explain(df, TRUE)
 #'}
 setMethod("explain",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, extended = FALSE) {
             queryExec <- callJMethod(x@sdf, "queryExecution")
             if (extended) {
@@ -146,9 +146,9 @@ setMethod("explain",
 #' Returns True if the `collect` and `take` methods can be run locally
 #' (without any Spark executors).
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname isLocal
 #' @name isLocal
 #' @export
@@ -161,19 +161,19 @@ setMethod("explain",
 #' isLocal(df)
 #'}
 setMethod("isLocal",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             callJMethod(x@sdf, "isLocal")
           })
 
 #' showDF
 #'
-#' Print the first numRows rows of a DataFrame
+#' Print the first numRows rows of a SparkDataFrame
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param numRows The number of rows to print. Defaults to 20.
 #'
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname showDF
 #' @name showDF
 #' @export
@@ -186,7 +186,7 @@ setMethod("isLocal",
 #' showDF(df)
 #'}
 setMethod("showDF",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, numRows = 20, truncate = TRUE) {
             s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
             cat(s)
@@ -194,11 +194,11 @@ setMethod("showDF",
 
 #' show
 #'
-#' Print the DataFrame column names and types
+#' Print the SparkDataFrame column names and types
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname show
 #' @name show
 #' @export
@@ -210,22 +210,22 @@ setMethod("showDF",
 #' df <- read.json(sqlContext, path)
 #' df
 #'}
-setMethod("show", "DataFrame",
+setMethod("show", "SparkDataFrame",
           function(object) {
             cols <- lapply(dtypes(object), function(l) {
               paste(l, collapse = ":")
             })
             s <- paste(cols, collapse = ", ")
-            cat(paste("DataFrame[", s, "]\n", sep = ""))
+            cat(paste(class(object), "[", s, "]\n", sep = ""))
           })
 
 #' DataTypes
 #'
 #' Return all column names and their data types as a list
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkSparkDataFrame functions
 #' @rdname dtypes
 #' @name dtypes
 #' @export
@@ -238,7 +238,7 @@ setMethod("show", "DataFrame",
 #' dtypes(df)
 #'}
 setMethod("dtypes",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             lapply(schema(x)$fields(), function(f) {
               c(f$name(), f$dataType.simpleString())
@@ -249,9 +249,9 @@ setMethod("dtypes",
 #'
 #' Return all column names as a list
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname columns
 #' @name columns
 
@@ -266,7 +266,7 @@ setMethod("dtypes",
 #' colnames(df)
 #'}
 setMethod("columns",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             sapply(schema(x)$fields(), function(f) {
               f$name()
@@ -276,7 +276,7 @@ setMethod("columns",
 #' @rdname columns
 #' @name names
 setMethod("names",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             columns(x)
           })
@@ -284,7 +284,7 @@ setMethod("names",
 #' @rdname columns
 #' @name names<-
 setMethod("names<-",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, value) {
             if (!is.null(value)) {
               sdf <- callJMethod(x@sdf, "toDF", as.list(value))
@@ -295,7 +295,7 @@ setMethod("names<-",
 #' @rdname columns
 #' @name colnames
 setMethod("colnames",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             columns(x)
           })
@@ -303,7 +303,7 @@ setMethod("colnames",
 #' @rdname columns
 #' @name colnames<-
 setMethod("colnames<-",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, value) {
 
             # Check parameter integrity
@@ -331,13 +331,13 @@ setMethod("colnames<-",
 
 #' coltypes
 #'
-#' Get column types of a DataFrame
+#' Get column types of a SparkDataFrame
 #'
-#' @param x A SparkSQL DataFrame
-#' @return value A character vector with the column types of the given DataFrame
+#' @param x A SparkDataFrame
+#' @return value A character vector with the column types of the given SparkDataFrame
 #' @rdname coltypes
 #' @name coltypes
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @export
 #' @examples
 #'\dontrun{
@@ -345,9 +345,9 @@ setMethod("colnames<-",
 #' coltypes(irisDF)
 #'}
 setMethod("coltypes",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
-            # Get the data types of the DataFrame by invoking dtypes() function
+            # Get the data types of the SparkDataFrame by invoking dtypes() function
             types <- sapply(dtypes(x), function(x) {x[[2]]})
 
             # Map Spark data types into R's data types using DATA_TYPES environment
@@ -382,11 +382,11 @@ setMethod("coltypes",
 
 #' coltypes
 #'
-#' Set the column types of a DataFrame.
+#' Set the column types of a SparkDataFrame.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param value A character vector with the target column types for the given
-#'    DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA
+#'    SparkDataFrame. Column types can be one of integer, numeric/double, character, logical, or NA
 #'    to keep that column as-is.
 #' @rdname coltypes
 #' @name coltypes<-
@@ -401,15 +401,15 @@ setMethod("coltypes",
 #' coltypes(df) <- c(NA, "numeric")
 #'}
 setMethod("coltypes<-",
-          signature(x = "DataFrame", value = "character"),
+          signature(x = "SparkDataFrame", value = "character"),
           function(x, value) {
             cols <- columns(x)
             ncols <- length(cols)
             if (length(value) == 0) {
-              stop("Cannot set types of an empty DataFrame with no Column")
+              stop("Cannot set types of an empty SparkDataFrame with no Column")
             }
             if (length(value) != ncols) {
-              stop("Length of type vector should match the number of columns for DataFrame")
+              stop("Length of type vector should match the number of columns for SparkDataFrame")
             }
             newCols <- lapply(seq_len(ncols), function(i) {
               col <- getColumn(x, cols[i])
@@ -429,12 +429,12 @@ setMethod("coltypes<-",
 
 #' Register Temporary Table
 #'
-#' Registers a DataFrame as a Temporary Table in the SQLContext
+#' Registers a SparkDataFrame as a Temporary Table in the SQLContext
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param tableName A character vector containing the name of the table
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname registerTempTable
 #' @name registerTempTable
 #' @export
@@ -448,21 +448,21 @@ setMethod("coltypes<-",
 #' new_df <- sql(sqlContext, "SELECT * FROM json_df")
 #'}
 setMethod("registerTempTable",
-          signature(x = "DataFrame", tableName = "character"),
+          signature(x = "SparkDataFrame", tableName = "character"),
           function(x, tableName) {
               invisible(callJMethod(x@sdf, "registerTempTable", tableName))
           })
 
 #' insertInto
 #'
-#' Insert the contents of a DataFrame into a table registered in the current SQL Context.
+#' Insert the contents of a SparkDataFrame into a table registered in the current SQL Context.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param tableName A character vector containing the name of the table
 #' @param overwrite A logical argument indicating whether or not to overwrite
 #' the existing rows in the table.
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname insertInto
 #' @name insertInto
 #' @export
@@ -476,7 +476,7 @@ setMethod("registerTempTable",
 #' insertInto(df2, "table1", overwrite = TRUE)
 #'}
 setMethod("insertInto",
-          signature(x = "DataFrame", tableName = "character"),
+          signature(x = "SparkDataFrame", tableName = "character"),
           function(x, tableName, overwrite = FALSE) {
             jmode <- convertToJSaveMode(ifelse(overwrite, "overwrite", "append"))
             write <- callJMethod(x@sdf, "write")
@@ -488,9 +488,9 @@ setMethod("insertInto",
 #'
 #' Persist with the default storage level (MEMORY_ONLY).
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname cache
 #' @name cache
 #' @export
@@ -503,7 +503,7 @@ setMethod("insertInto",
 #' cache(df)
 #'}
 setMethod("cache",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             cached <- callJMethod(x@sdf, "cache")
             x@env$isCached <- TRUE
@@ -512,13 +512,13 @@ setMethod("cache",
 
 #' Persist
 #'
-#' Persist this DataFrame with the specified storage level. For details of the
+#' Persist this SparkDataFrame with the specified storage level. For details of the
 #' supported storage levels, refer to
 #' \url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}.
 #'
-#' @param x The DataFrame to persist
+#' @param x The SparkDataFrame to persist
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname persist
 #' @name persist
 #' @export
@@ -531,7 +531,7 @@ setMethod("cache",
 #' persist(df, "MEMORY_AND_DISK")
 #'}
 setMethod("persist",
-          signature(x = "DataFrame", newLevel = "character"),
+          signature(x = "SparkDataFrame", newLevel = "character"),
           function(x, newLevel) {
             callJMethod(x@sdf, "persist", getStorageLevel(newLevel))
             x@env$isCached <- TRUE
@@ -540,13 +540,13 @@ setMethod("persist",
 
 #' Unpersist
 #'
-#' Mark this DataFrame as non-persistent, and remove all blocks for it from memory and
+#' Mark this SparkDataFrame as non-persistent, and remove all blocks for it from memory and
 #' disk.
 #'
-#' @param x The DataFrame to unpersist
+#' @param x The SparkDataFrame to unpersist
 #' @param blocking Whether to block until all blocks are deleted
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname unpersist-methods
 #' @name unpersist
 #' @export
@@ -560,7 +560,7 @@ setMethod("persist",
 #' unpersist(df)
 #'}
 setMethod("unpersist",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, blocking = TRUE) {
             callJMethod(x@sdf, "unpersist", blocking)
             x@env$isCached <- FALSE
@@ -569,12 +569,12 @@ setMethod("unpersist",
 
 #' Repartition
 #'
-#' Return a new DataFrame that has exactly numPartitions partitions.
+#' Return a new SparkDataFrame that has exactly numPartitions partitions.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param numPartitions The number of partitions to use.
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname repartition
 #' @name repartition
 #' @export
@@ -587,7 +587,7 @@ setMethod("unpersist",
 #' newDF <- repartition(df, 2L)
 #'}
 setMethod("repartition",
-          signature(x = "DataFrame", numPartitions = "numeric"),
+          signature(x = "SparkDataFrame", numPartitions = "numeric"),
           function(x, numPartitions) {
             sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
             dataFrame(sdf)
@@ -595,12 +595,12 @@ setMethod("repartition",
 
 #' toJSON
 #'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
+#' Convert the rows of a SparkDataFrame into JSON objects and return an RDD where
 #' each element contains a JSON string.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @return A StringRRDD of JSON objects
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname tojson
 #' @noRd
 #' @examples
@@ -612,7 +612,7 @@ setMethod("repartition",
 #' newRDD <- toJSON(df)
 #'}
 setMethod("toJSON",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             rdd <- callJMethod(x@sdf, "toJSON")
             jrdd <- callJMethod(rdd, "toJavaRDD")
@@ -621,13 +621,13 @@ setMethod("toJSON",
 
 #' write.json
 #'
-#' Save the contents of a DataFrame as a JSON file (one object per line). Files written out
-#' with this method can be read back in as a DataFrame using read.json().
+#' Save the contents of a SparkDataFrame as a JSON file (one object per line). Files written out
+#' with this method can be read back in as a SparkDataFrame using read.json().
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname write.json
 #' @name write.json
 #' @export
@@ -640,7 +640,7 @@ setMethod("toJSON",
 #' write.json(df, "/tmp/sparkr-tmp/")
 #'}
 setMethod("write.json",
-          signature(x = "DataFrame", path = "character"),
+          signature(x = "SparkDataFrame", path = "character"),
           function(x, path) {
             write <- callJMethod(x@sdf, "write")
             invisible(callJMethod(write, "json", path))
@@ -648,13 +648,13 @@ setMethod("write.json",
 
 #' write.parquet
 #'
-#' Save the contents of a DataFrame as a Parquet file, preserving the schema. Files written out
-#' with this method can be read back in as a DataFrame using read.parquet().
+#' Save the contents of a SparkDataFrame as a Parquet file, preserving the schema. Files written out
+#' with this method can be read back in as a SparkDataFrame using read.parquet().
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname write.parquet
 #' @name write.parquet
 #' @export
@@ -668,7 +668,7 @@ setMethod("write.json",
 #' saveAsParquetFile(df, "/tmp/sparkr-tmp2/")
 #'}
 setMethod("write.parquet",
-          signature(x = "DataFrame", path = "character"),
+          signature(x = "SparkDataFrame", path = "character"),
           function(x, path) {
             write <- callJMethod(x@sdf, "write")
             invisible(callJMethod(write, "parquet", path))
@@ -678,7 +678,7 @@ setMethod("write.parquet",
 #' @name saveAsParquetFile
 #' @export
 setMethod("saveAsParquetFile",
-          signature(x = "DataFrame", path = "character"),
+          signature(x = "SparkDataFrame", path = "character"),
           function(x, path) {
             .Deprecated("write.parquet")
             write.parquet(x, path)
@@ -686,14 +686,14 @@ setMethod("saveAsParquetFile",
 
 #' write.text
 #'
-#' Saves the content of the DataFrame in a text file at the specified path.
-#' The DataFrame must have only one column of string type with the name "value".
+#' Saves the content of the SparkDataFrame in a text file at the specified path.
+#' The SparkDataFrame must have only one column of string type with the name "value".
 #' Each row becomes a new line in the output file.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname write.text
 #' @name write.text
 #' @export
@@ -706,7 +706,7 @@ setMethod("saveAsParquetFile",
 #' write.text(df, "/tmp/sparkr-tmp/")
 #'}
 setMethod("write.text",
-          signature(x = "DataFrame", path = "character"),
+          signature(x = "SparkDataFrame", path = "character"),
           function(x, path) {
             write <- callJMethod(x@sdf, "write")
             invisible(callJMethod(write, "text", path))
@@ -714,11 +714,11 @@ setMethod("write.text",
 
 #' Distinct
 #'
-#' Return a new DataFrame containing the distinct rows in this DataFrame.
+#' Return a new SparkDataFrame containing the distinct rows in this SparkDataFrame.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname distinct
 #' @name distinct
 #' @export
@@ -731,7 +731,7 @@ setMethod("write.text",
 #' distinctDF <- distinct(df)
 #'}
 setMethod("distinct",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             sdf <- callJMethod(x@sdf, "distinct")
             dataFrame(sdf)
@@ -740,21 +740,21 @@ setMethod("distinct",
 #' @rdname distinct
 #' @name unique
 setMethod("unique",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             distinct(x)
           })
 
 #' Sample
 #'
-#' Return a sampled subset of this DataFrame using a random seed.
+#' Return a sampled subset of this SparkDataFrame using a random seed.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param withReplacement Sampling with replacement or not
 #' @param fraction The (rough) sample target fraction
 #' @param seed Randomness seed value
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname sample
 #' @name sample
 #' @export
@@ -768,7 +768,7 @@ setMethod("unique",
 #' collect(sample(df, TRUE, 0.5))
 #'}
 setMethod("sample",
-          signature(x = "DataFrame", withReplacement = "logical",
+          signature(x = "SparkDataFrame", withReplacement = "logical",
                     fraction = "numeric"),
           function(x, withReplacement, fraction, seed) {
             if (fraction < 0.0) stop(cat("Negative fraction value:", fraction))
@@ -785,7 +785,7 @@ setMethod("sample",
 #' @rdname sample
 #' @name sample_frac
 setMethod("sample_frac",
-          signature(x = "DataFrame", withReplacement = "logical",
+          signature(x = "SparkDataFrame", withReplacement = "logical",
                     fraction = "numeric"),
           function(x, withReplacement, fraction, seed) {
             sample(x, withReplacement, fraction, seed)
@@ -793,11 +793,11 @@ setMethod("sample_frac",
 
 #' nrow
 #'
-#' Returns the number of rows in a DataFrame
+#' Returns the number of rows in a SparkDataFrame
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname nrow
 #' @name count
 #' @export
@@ -810,7 +810,7 @@ setMethod("sample_frac",
 #' count(df)
 #' }
 setMethod("count",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             callJMethod(x@sdf, "count")
           })
@@ -818,16 +818,16 @@ setMethod("count",
 #' @name nrow
 #' @rdname nrow
 setMethod("nrow",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             count(x)
           })
 
-#' Returns the number of columns in a DataFrame
+#' Returns the number of columns in a SparkDataFrame
 #'
-#' @param x a SparkSQL DataFrame
+#' @param x a SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname ncol
 #' @name ncol
 #' @export
@@ -840,15 +840,15 @@ setMethod("nrow",
 #' ncol(df)
 #' }
 setMethod("ncol",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             length(columns(x))
           })
 
-#' Returns the dimentions (number of rows and columns) of a DataFrame
-#' @param x a SparkSQL DataFrame
+#' Returns the dimentions (number of rows and columns) of a SparkDataFrame
+#' @param x a SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname dim
 #' @name dim
 #' @export
@@ -861,18 +861,18 @@ setMethod("ncol",
 #' dim(df)
 #' }
 setMethod("dim",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             c(count(x), ncol(x))
           })
 
-#' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
+#' Collects all the elements of a SparkDataFrame and coerces them into an R data.frame.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
 #' should be converted to factors. FALSE by default.
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname collect
 #' @name collect
 #' @export
@@ -886,7 +886,7 @@ setMethod("dim",
 #' firstName <- collected[[1]]$name
 #' }
 setMethod("collect",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, stringsAsFactors = FALSE) {
             dtypes <- dtypes(x)
             ncol <- length(dtypes)
@@ -938,13 +938,13 @@ setMethod("collect",
 
 #' Limit
 #'
-#' Limit the resulting DataFrame to the number of rows specified.
+#' Limit the resulting SparkDataFrame to the number of rows specified.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param num The number of rows to return
-#' @return A new DataFrame containing the number of rows specified.
+#' @return A new SparkDataFrame containing the number of rows specified.
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname limit
 #' @name limit
 #' @export
@@ -957,15 +957,15 @@ setMethod("collect",
 #' limitedDF <- limit(df, 10)
 #' }
 setMethod("limit",
-          signature(x = "DataFrame", num = "numeric"),
+          signature(x = "SparkDataFrame", num = "numeric"),
           function(x, num) {
             res <- callJMethod(x@sdf, "limit", as.integer(num))
             dataFrame(res)
           })
 
-#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
+#' Take the first NUM rows of a SparkDataFrame and return a the results as a data.frame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname take
 #' @name take
 #' @export
@@ -978,7 +978,7 @@ setMethod("limit",
 #' take(df, 2)
 #' }
 setMethod("take",
-          signature(x = "DataFrame", num = "numeric"),
+          signature(x = "SparkDataFrame", num = "numeric"),
           function(x, num) {
             limited <- limit(x, num)
             collect(limited)
@@ -986,15 +986,15 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
+#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is NULL,
 #' then head() returns the first 6 rows in keeping with the current data.frame
 #' convention in R.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param num The number of rows to return. Default is 6.
 #' @return A data.frame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname head
 #' @name head
 #' @export
@@ -1007,17 +1007,17 @@ setMethod("take",
 #' head(df)
 #' }
 setMethod("head",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, num = 6L) {
           # Default num is 6L in keeping with R's data.frame convention
             take(x, num)
           })
 
-#' Return the first row of a DataFrame
+#' Return the first row of a SparkDataFrame
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname first
 #' @name first
 #' @export
@@ -1030,16 +1030,16 @@ setMethod("head",
 #' first(df)
 #' }
 setMethod("first",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             take(x, 1)
           })
 
 #' toRDD
 #'
-#' Converts a Spark DataFrame to an RDD while preserving column names.
+#' Converts a SparkDataFrame to an RDD while preserving column names.
 #'
-#' @param x A Spark DataFrame
+#' @param x A SparkDataFrame
 #'
 #' @noRd
 #' @examples
@@ -1051,7 +1051,7 @@ setMethod("first",
 #' rdd <- toRDD(df)
 #'}
 setMethod("toRDD",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             jrdd <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToRowRDD", x@sdf)
             colNames <- callJMethod(x@sdf, "columns")
@@ -1064,12 +1064,12 @@ setMethod("toRDD",
 
 #' GroupBy
 #'
-#' Groups the DataFrame using the specified columns, so we can run aggregation on them.
+#' Groups the SparkDataFrame using the specified columns, so we can run aggregation on them.
 #'
-#' @param x a DataFrame
+#' @param x a SparkDataFrame
 #' @return a GroupedData
 #' @seealso GroupedData
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname groupBy
 #' @name groupBy
 #' @export
@@ -1082,7 +1082,7 @@ setMethod("toRDD",
 #'   agg(groupBy(df, "department", "gender"), salary="avg", "age" -> "max")
 #' }
 setMethod("groupBy",
-           signature(x = "DataFrame"),
+           signature(x = "SparkDataFrame"),
            function(x, ...) {
              cols <- list(...)
              if (length(cols) >= 1 && class(cols[[1]]) == "character") {
@@ -1097,7 +1097,7 @@ setMethod("groupBy",
 #' @rdname groupBy
 #' @name group_by
 setMethod("group_by",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, ...) {
             groupBy(x, ...)
           })
@@ -1106,13 +1106,13 @@ setMethod("group_by",
 #'
 #' Compute aggregates by specifying a list of columns
 #'
-#' @param x a DataFrame
-#' @family DataFrame functions
+#' @param x a SparkDataFrame
+#' @family SparkDataFrame functions
 #' @rdname agg
 #' @name agg
 #' @export
 setMethod("agg",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, ...) {
             agg(groupBy(x), ...)
           })
@@ -1120,7 +1120,7 @@ setMethod("agg",
 #' @rdname agg
 #' @name summarize
 setMethod("summarize",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, ...) {
             agg(x, ...)
           })
@@ -1135,7 +1135,7 @@ setMethod("summarize",
 #' @rdname lapply
 #' @noRd
 setMethod("lapply",
-          signature(X = "DataFrame", FUN = "function"),
+          signature(X = "SparkDataFrame", FUN = "function"),
           function(X, FUN) {
             rdd <- toRDD(X)
             lapply(rdd, FUN)
@@ -1144,7 +1144,7 @@ setMethod("lapply",
 #' @rdname lapply
 #' @noRd
 setMethod("map",
-          signature(X = "DataFrame", FUN = "function"),
+          signature(X = "SparkDataFrame", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
@@ -1152,7 +1152,7 @@ setMethod("map",
 #' @rdname flatMap
 #' @noRd
 setMethod("flatMap",
-          signature(X = "DataFrame", FUN = "function"),
+          signature(X = "SparkDataFrame", FUN = "function"),
           function(X, FUN) {
             rdd <- toRDD(X)
             flatMap(rdd, FUN)
@@ -1161,7 +1161,7 @@ setMethod("flatMap",
 #' @rdname lapplyPartition
 #' @noRd
 setMethod("lapplyPartition",
-          signature(X = "DataFrame", FUN = "function"),
+          signature(X = "SparkDataFrame", FUN = "function"),
           function(X, FUN) {
             rdd <- toRDD(X)
             lapplyPartition(rdd, FUN)
@@ -1170,7 +1170,7 @@ setMethod("lapplyPartition",
 #' @rdname lapplyPartition
 #' @noRd
 setMethod("mapPartitions",
-          signature(X = "DataFrame", FUN = "function"),
+          signature(X = "SparkDataFrame", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
@@ -1178,7 +1178,7 @@ setMethod("mapPartitions",
 #' @rdname foreach
 #' @noRd
 setMethod("foreach",
-          signature(x = "DataFrame", func = "function"),
+          signature(x = "SparkDataFrame", func = "function"),
           function(x, func) {
             rdd <- toRDD(x)
             foreach(rdd, func)
@@ -1187,7 +1187,7 @@ setMethod("foreach",
 #' @rdname foreach
 #' @noRd
 setMethod("foreachPartition",
-          signature(x = "DataFrame", func = "function"),
+          signature(x = "SparkDataFrame", func = "function"),
           function(x, func) {
             rdd <- toRDD(x)
             foreachPartition(rdd, func)
@@ -1202,14 +1202,14 @@ getColumn <- function(x, c) {
 
 #' @rdname select
 #' @name $
-setMethod("$", signature(x = "DataFrame"),
+setMethod("$", signature(x = "SparkDataFrame"),
           function(x, name) {
             getColumn(x, name)
           })
 
 #' @rdname select
 #' @name $<-
-setMethod("$<-", signature(x = "DataFrame"),
+setMethod("$<-", signature(x = "SparkDataFrame"),
           function(x, name, value) {
             stopifnot(class(value) == "Column" || is.null(value))
 
@@ -1226,7 +1226,7 @@ setClassUnion("numericOrcharacter", c("numeric", "character"))
 
 #' @rdname subset
 #' @name [[
-setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
+setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
           function(x, i) {
             if (is.numeric(i)) {
               cols <- columns(x)
@@ -1237,7 +1237,7 @@ setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
 
 #' @rdname subset
 #' @name [
-setMethod("[", signature(x = "DataFrame", i = "missing"),
+setMethod("[", signature(x = "SparkDataFrame", i = "missing"),
           function(x, i, j, ...) {
             if (is.numeric(j)) {
               cols <- columns(x)
@@ -1251,7 +1251,7 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
 
 #' @rdname subset
 #' @name [
-setMethod("[", signature(x = "DataFrame", i = "Column"),
+setMethod("[", signature(x = "SparkDataFrame", i = "Column"),
           function(x, i, j, ...) {
             # It could handle i as "character" but it seems confusing and not required
             # https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
@@ -1265,13 +1265,15 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
 
 #' Subset
 #'
-#' Return subsets of DataFrame according to given conditions
-#' @param x A DataFrame
+#' Return subsets of SparkDataFrame according to given conditions
+#' @param x A SparkDataFrame
 #' @param subset (Optional) A logical expression to filter on rows
-#' @param select expression for the single Column or a list of columns to select from the DataFrame
-#' @return A new DataFrame containing only the rows that meet the condition with selected columns
+#' @param select expression for the single Column or a list of columns to select from the
+#' SparkDataFrame
+#' @return A new SparkDataFrame containing only the rows that meet the condition with selected
+#' columns
 #' @export
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname subset
 #' @name subset
 #' @family subsetting functions
@@ -1283,14 +1285,14 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
 #'   df[,c("name", "age")]
 #'   # Or to filter rows
 #'   df[df$age > 20,]
-#'   # DataFrame can be subset on both rows and Columns
+#'   # SparkDataFrame can be subset on both rows and Columns
 #'   df[df$name == "Smith", c(1,2)]
 #'   df[df$age %in% c(19, 30), 1:2]
 #'   subset(df, df$age %in% c(19, 30), 1:2)
 #'   subset(df, df$age %in% c(19), select = c(1,2))
 #'   subset(df, select = c(1,2))
 #' }
-setMethod("subset", signature(x = "DataFrame"),
+setMethod("subset", signature(x = "SparkDataFrame"),
           function(x, subset, select, ...) {
             if (missing(subset)) {
               x[, select, ...]
@@ -1302,11 +1304,11 @@ setMethod("subset", signature(x = "DataFrame"),
 #' Select
 #'
 #' Selects a set of columns with names or Column expressions.
-#' @param x A DataFrame
+#' @param x A SparkDataFrame
 #' @param col A list of columns or single Column or name
-#' @return A new DataFrame with selected columns
+#' @return A new SparkDataFrame with selected columns
 #' @export
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname select
 #' @name select
 #' @family subsetting functions
@@ -1320,7 +1322,7 @@ setMethod("subset", signature(x = "DataFrame"),
 #'   # Similar to R data frames columns can also be selected using `$`
 #'   df[,df$age]
 #' }
-setMethod("select", signature(x = "DataFrame", col = "character"),
+setMethod("select", signature(x = "SparkDataFrame", col = "character"),
           function(x, col, ...) {
             if (length(col) > 1) {
               if (length(list(...)) > 0) {
@@ -1334,10 +1336,10 @@ setMethod("select", signature(x = "DataFrame", col = "character"),
             }
           })
 
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname select
 #' @export
-setMethod("select", signature(x = "DataFrame", col = "Column"),
+setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
           function(x, col, ...) {
             jcols <- lapply(list(col, ...), function(c) {
               c@jc
@@ -1346,11 +1348,11 @@ setMethod("select", signature(x = "DataFrame", col = "Column"),
             dataFrame(sdf)
           })
 
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname select
 #' @export
 setMethod("select",
-          signature(x = "DataFrame", col = "list"),
+          signature(x = "SparkDataFrame", col = "list"),
           function(x, col) {
             cols <- lapply(col, function(c) {
               if (class(c) == "Column") {
@@ -1365,13 +1367,13 @@ setMethod("select",
 
 #' SelectExpr
 #'
-#' Select from a DataFrame using a set of SQL expressions.
+#' Select from a SparkDataFrame using a set of SQL expressions.
 #'
-#' @param x A DataFrame to be selected from.
+#' @param x A SparkDataFrame to be selected from.
 #' @param expr A string containing a SQL expression
 #' @param ... Additional expressions
-#' @return A DataFrame
-#' @family DataFrame functions
+#' @return A SparkDataFrame
+#' @family SparkDataFrame functions
 #' @rdname selectExpr
 #' @name selectExpr
 #' @export
@@ -1384,7 +1386,7 @@ setMethod("select",
 #' selectExpr(df, "col1", "(col2 * 5) as newCol")
 #' }
 setMethod("selectExpr",
-          signature(x = "DataFrame", expr = "character"),
+          signature(x = "SparkDataFrame", expr = "character"),
           function(x, expr, ...) {
             exprList <- list(expr, ...)
             sdf <- callJMethod(x@sdf, "selectExpr", exprList)
@@ -1393,14 +1395,14 @@ setMethod("selectExpr",
 
 #' WithColumn
 #'
-#' Return a new DataFrame by adding a column or replacing the existing column
+#' Return a new SparkDataFrame by adding a column or replacing the existing column
 #' that has the same name.
 #'
-#' @param x A DataFrame
+#' @param x A SparkDataFrame
 #' @param colName A column name.
 #' @param col A Column expression.
-#' @return A DataFrame with the new column added or the existing column replaced.
-#' @family DataFrame functions
+#' @return A SparkDataFrame with the new column added or the existing column replaced.
+#' @family SparkDataFrame functions
 #' @rdname withColumn
 #' @name withColumn
 #' @seealso \link{rename} \link{mutate}
@@ -1416,7 +1418,7 @@ setMethod("selectExpr",
 #' newDF2 <- withColumn(newDF, "newCol", newDF$col1)
 #' }
 setMethod("withColumn",
-          signature(x = "DataFrame", colName = "character", col = "Column"),
+          signature(x = "SparkDataFrame", colName = "character", col = "Column"),
           function(x, colName, col) {
             sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc)
             dataFrame(sdf)
@@ -1424,12 +1426,12 @@ setMethod("withColumn",
 
 #' Mutate
 #'
-#' Return a new DataFrame with the specified columns added.
+#' Return a new SparkDataFrame with the specified columns added.
 #'
-#' @param .data A DataFrame
+#' @param .data A SparkDataFrame
 #' @param col a named argument of the form name = col
-#' @return A new DataFrame with the new columns added.
-#' @family DataFrame functions
+#' @return A new SparkDataFrame with the new columns added.
+#' @family SparkDataFrame functions
 #' @rdname mutate
 #' @name mutate
 #' @seealso \link{rename} \link{withColumn}
@@ -1445,7 +1447,7 @@ setMethod("withColumn",
 #' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
 #' }
 setMethod("mutate",
-          signature(.data = "DataFrame"),
+          signature(.data = "SparkDataFrame"),
           function(.data, ...) {
             x <- .data
             cols <- list(...)
@@ -1466,20 +1468,20 @@ setMethod("mutate",
 #' @rdname mutate
 #' @name transform
 setMethod("transform",
-          signature(`_data` = "DataFrame"),
+          signature(`_data` = "SparkDataFrame"),
           function(`_data`, ...) {
             mutate(`_data`, ...)
           })
 
 #' rename
 #'
-#' Rename an existing column in a DataFrame.
+#' Rename an existing column in a SparkDataFrame.
 #'
-#' @param x A DataFrame
+#' @param x A SparkDataFrame
 #' @param existingCol The name of the column you want to change.
 #' @param newCol The new column name.
-#' @return A DataFrame with the column name changed.
-#' @family DataFrame functions
+#' @return A SparkDataFrame with the column name changed.
+#' @family SparkDataFrame functions
 #' @rdname rename
 #' @name withColumnRenamed
 #' @seealso \link{mutate}
@@ -1493,7 +1495,7 @@ setMethod("transform",
 #' newDF <- withColumnRenamed(df, "col1", "newCol1")
 #' }
 setMethod("withColumnRenamed",
-          signature(x = "DataFrame", existingCol = "character", newCol = "character"),
+          signature(x = "SparkDataFrame", existingCol = "character", newCol = "character"),
           function(x, existingCol, newCol) {
             cols <- lapply(columns(x), function(c) {
               if (c == existingCol) {
@@ -1518,7 +1520,7 @@ setMethod("withColumnRenamed",
 #' newDF <- rename(df, col1 = df$newCol1)
 #' }
 setMethod("rename",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, ...) {
             renameCols <- list(...)
             stopifnot(length(renameCols) > 0)
@@ -1541,15 +1543,15 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 
 #' Arrange
 #'
-#' Sort a DataFrame by the specified column(s).
+#' Sort a SparkDataFrame by the specified column(s).
 #'
-#' @param x A DataFrame to be sorted.
+#' @param x A SparkDataFrame to be sorted.
 #' @param col A character or Column object vector indicating the fields to sort on
 #' @param ... Additional sorting fields
 #' @param decreasing A logical argument indicating sorting order for columns when
 #'                   a character vector is specified for col
-#' @return A DataFrame where all elements are sorted.
-#' @family DataFrame functions
+#' @return A SparkDataFrame where all elements are sorted.
+#' @family SparkDataFrame functions
 #' @rdname arrange
 #' @name arrange
 #' @export
@@ -1565,7 +1567,7 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
 #' }
 setMethod("arrange",
-          signature(x = "DataFrame", col = "Column"),
+          signature(x = "SparkDataFrame", col = "Column"),
           function(x, col, ...) {
               jcols <- lapply(list(col, ...), function(c) {
                 c@jc
@@ -1579,7 +1581,7 @@ setMethod("arrange",
 #' @name arrange
 #' @export
 setMethod("arrange",
-          signature(x = "DataFrame", col = "character"),
+          signature(x = "SparkDataFrame", col = "character"),
           function(x, col, ..., decreasing = FALSE) {
 
             # all sorting columns
@@ -1611,20 +1613,20 @@ setMethod("arrange",
 #' @name orderBy
 #' @export
 setMethod("orderBy",
-          signature(x = "DataFrame", col = "characterOrColumn"),
+          signature(x = "SparkDataFrame", col = "characterOrColumn"),
           function(x, col) {
             arrange(x, col)
           })
 
 #' Filter
 #'
-#' Filter the rows of a DataFrame according to a given condition.
+#' Filter the rows of a SparkDataFrame according to a given condition.
 #'
-#' @param x A DataFrame to be sorted.
+#' @param x A SparkDataFrame to be sorted.
 #' @param condition The condition to filter on. This may either be a Column expression
 #' or a string containing a SQL statement
-#' @return A DataFrame containing only the rows that meet the condition.
-#' @family DataFrame functions
+#' @return A SparkDataFrame containing only the rows that meet the condition.
+#' @family SparkDataFrame functions
 #' @rdname filter
 #' @name filter
 #' @family subsetting functions
@@ -1639,7 +1641,7 @@ setMethod("orderBy",
 #' filter(df, df$col2 != "abcdefg")
 #' }
 setMethod("filter",
-          signature(x = "DataFrame", condition = "characterOrColumn"),
+          signature(x = "SparkDataFrame", condition = "characterOrColumn"),
           function(x, condition) {
             if (class(condition) == "Column") {
               condition <- condition@jc
@@ -1648,24 +1650,24 @@ setMethod("filter",
             dataFrame(sdf)
           })
 
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname filter
 #' @name where
 setMethod("where",
-          signature(x = "DataFrame", condition = "characterOrColumn"),
+          signature(x = "SparkDataFrame", condition = "characterOrColumn"),
           function(x, condition) {
             filter(x, condition)
           })
 
 #' dropDuplicates
 #'
-#' Returns a new DataFrame with duplicate rows removed, considering only
+#' Returns a new SparkDataFrame with duplicate rows removed, considering only
 #' the subset of columns.
 #'
-#' @param x A DataFrame.
+#' @param x A SparkDataFrame.
 #' @param colnames A character vector of column names.
-#' @return A DataFrame with duplicate rows removed.
-#' @family DataFrame functions
+#' @return A SparkDataFrame with duplicate rows removed.
+#' @family SparkDataFrame functions
 #' @rdname dropduplicates
 #' @name dropDuplicates
 #' @export
@@ -1679,7 +1681,7 @@ setMethod("where",
 #' dropDuplicates(df, c("col1", "col2"))
 #' }
 setMethod("dropDuplicates",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, colNames = columns(x)) {
             stopifnot(class(colNames) == "character")
 
@@ -1689,17 +1691,17 @@ setMethod("dropDuplicates",
 
 #' Join
 #'
-#' Join two DataFrames based on the given join expression.
+#' Join two SparkDataFrames based on the given join expression.
 #'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
+#' @param x A SparkDataFrame
+#' @param y A SparkDataFrame
 #' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
 #' Column expression. If joinExpr is omitted, join() will perform a Cartesian join
 #' @param joinType The type of join to perform. The following join types are available:
 #' 'inner', 'outer', 'full', 'fullouter', leftouter', 'left_outer', 'left',
 #' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
-#' @return A DataFrame containing the result of the join operation.
-#' @family DataFrame functions
+#' @return A SparkDataFrame containing the result of the join operation.
+#' @family SparkDataFrame functions
 #' @rdname join
 #' @name join
 #' @seealso \link{merge}
@@ -1715,7 +1717,7 @@ setMethod("dropDuplicates",
 #' join(df1, df2, df1$col1 == df2$col2, "right_outer")
 #' }
 setMethod("join",
-          signature(x = "DataFrame", y = "DataFrame"),
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y, joinExpr = NULL, joinType = NULL) {
             if (is.null(joinExpr)) {
               sdf <- callJMethod(x@sdf, "join", y@sdf)
@@ -1757,7 +1759,7 @@ setMethod("join",
 #'   be returned. If all.x is set to FALSE and all.y is set to TRUE, a right
 #'   outer join will be returned. If all.x and all.y are set to TRUE, a full
 #'   outer join will be returned.
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname merge
 #' @seealso \link{join}
 #' @export
@@ -1776,7 +1778,7 @@ setMethod("join",
 #' merge(df1, df2, by = "col1", all = TRUE, suffixes = c("-X", "-Y"))
 #' }
 setMethod("merge",
-          signature(x = "DataFrame", y = "DataFrame"),
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by,
                    all = FALSE, all.x = all, all.y = all,
                    sort = TRUE, suffixes = c("_x", "_y"), ... ) {
@@ -1858,7 +1860,7 @@ setMethod("merge",
 #' Creates a list of columns by replacing the intersected ones with aliases.
 #' The name of the alias column is formed by concatanating the original column name and a suffix.
 #'
-#' @param x a DataFrame on which the
+#' @param x a SparkDataFrame on which the
 #' @param intersectedColNames a list of intersected column names
 #' @param suffix a suffix for the column name
 #' @return list of columns
@@ -1883,14 +1885,14 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 
 #' rbind
 #'
-#' Return a new DataFrame containing the union of rows in this DataFrame
-#' and another DataFrame. This is equivalent to `UNION ALL` in SQL.
-#' Note that this does not remove duplicate rows across the two DataFrames.
+#' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
+#' and another SparkDataFrame. This is equivalent to `UNION ALL` in SQL.
+#' Note that this does not remove duplicate rows across the two SparkDataFrames.
 #'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @return A DataFrame containing the result of the union.
-#' @family DataFrame functions
+#' @param x A SparkDataFrame
+#' @param y A SparkDataFrame
+#' @return A SparkDataFrame containing the result of the union.
+#' @family SparkDataFrame functions
 #' @rdname rbind
 #' @name unionAll
 #' @export
@@ -1903,20 +1905,20 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 #' unioned <- unionAll(df, df2)
 #' }
 setMethod("unionAll",
-          signature(x = "DataFrame", y = "DataFrame"),
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
             unioned <- callJMethod(x@sdf, "unionAll", y@sdf)
             dataFrame(unioned)
           })
 
-#' @title Union two or more DataFrames
-#' @description Returns a new DataFrame containing rows of all parameters.
+#' @title Union two or more SparkDataFrames
+#' @description Returns a new SparkDataFrame containing rows of all parameters.
 #'
 #' @rdname rbind
 #' @name rbind
 #' @export
 setMethod("rbind",
-          signature(... = "DataFrame"),
+          signature(... = "SparkDataFrame"),
           function(x, ..., deparse.level = 1) {
             if (nargs() == 3) {
               unionAll(x, ...)
@@ -1927,13 +1929,13 @@ setMethod("rbind",
 
 #' Intersect
 #'
-#' Return a new DataFrame containing rows only in both this DataFrame
-#' and another DataFrame. This is equivalent to `INTERSECT` in SQL.
+#' Return a new SparkDataFrame containing rows only in both this SparkDataFrame
+#' and another SparkDataFrame. This is equivalent to `INTERSECT` in SQL.
 #'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @return A DataFrame containing the result of the intersect.
-#' @family DataFrame functions
+#' @param x A SparkDataFrame
+#' @param y A SparkDataFrame
+#' @return A SparkDataFrame containing the result of the intersect.
+#' @family SparkDataFrame functions
 #' @rdname intersect
 #' @name intersect
 #' @export
@@ -1946,7 +1948,7 @@ setMethod("rbind",
 #' intersectDF <- intersect(df, df2)
 #' }
 setMethod("intersect",
-          signature(x = "DataFrame", y = "DataFrame"),
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
             intersected <- callJMethod(x@sdf, "intersect", y@sdf)
             dataFrame(intersected)
@@ -1954,13 +1956,13 @@ setMethod("intersect",
 
 #' except
 #'
-#' Return a new DataFrame containing rows in this DataFrame
-#' but not in another DataFrame. This is equivalent to `EXCEPT` in SQL.
+#' Return a new SparkDataFrame containing rows in this SparkDataFrame
+#' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL.
 #'
-#' @param x A Spark DataFrame
-#' @param y A Spark DataFrame
-#' @return A DataFrame containing the result of the except operation.
-#' @family DataFrame functions
+#' @param x A SparkDataFrame
+#' @param y A SparkDataFrame
+#' @return A SparkDataFrame containing the result of the except operation.
+#' @family SparkDataFrame functions
 #' @rdname except
 #' @name except
 #' @export
@@ -1975,13 +1977,13 @@ setMethod("intersect",
 #' @rdname except
 #' @export
 setMethod("except",
-          signature(x = "DataFrame", y = "DataFrame"),
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y) {
             excepted <- callJMethod(x@sdf, "except", y@sdf)
             dataFrame(excepted)
           })
 
-#' Save the contents of the DataFrame to a data source
+#' Save the contents of the SparkDataFrame to a data source
 #'
 #' The data source is specified by the `source` and a set of options (...).
 #' If `source` is not specified, the default data source configured by
@@ -1989,18 +1991,19 @@ setMethod("except",
 #'
 #' Additionally, mode is used to specify the behavior of the save operation when
 #' data already exists in the data source. There are four modes: \cr
-#'  append: Contents of this DataFrame are expected to be appended to existing data. \cr
-#'  overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
+#'  append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
+#'  overwrite: Existing data is expected to be overwritten by the contents of this
+#'     SparkDataFrame. \cr
 #'  error: An exception is expected to be thrown. \cr
-#'  ignore: The save operation is expected to not save the contents of the DataFrame
+#'  ignore: The save operation is expected to not save the contents of the SparkDataFrame
 #'     and to not change the existing data. \cr
 #'
-#' @param df A SparkSQL DataFrame
+#' @param df A SparkDataFrame
 #' @param path A name for the table
 #' @param source A name for external data source
 #' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname write.df
 #' @name write.df
 #' @export
@@ -2014,7 +2017,7 @@ setMethod("except",
 #' saveDF(df, parquetPath2, "parquet", mode = saveMode, mergeSchema = mergeSchema)
 #' }
 setMethod("write.df",
-          signature(df = "DataFrame", path = "character"),
+          signature(df = "SparkDataFrame", path = "character"),
           function(df, path, source = NULL, mode = "error", ...){
             if (is.null(source)) {
               if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
@@ -2042,14 +2045,14 @@ setMethod("write.df",
 #' @name saveDF
 #' @export
 setMethod("saveDF",
-          signature(df = "DataFrame", path = "character"),
+          signature(df = "SparkDataFrame", path = "character"),
           function(df, path, source = NULL, mode = "error", ...){
             write.df(df, path, source, mode, ...)
           })
 
 #' saveAsTable
 #'
-#' Save the contents of the DataFrame to a data source as a table
+#' Save the contents of the SparkDataFrame to a data source as a table
 #'
 #' The data source is specified by the `source` and a set of options (...).
 #' If `source` is not specified, the default data source configured by
@@ -2057,18 +2060,19 @@ setMethod("saveDF",
 #'
 #' Additionally, mode is used to specify the behavior of the save operation when
 #' data already exists in the data source. There are four modes: \cr
-#'  append: Contents of this DataFrame are expected to be appended to existing data. \cr
-#'  overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
+#'  append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
+#'  overwrite: Existing data is expected to be overwritten by the contents of this
+#'     SparkDataFrame. \cr
 #'  error: An exception is expected to be thrown. \cr
-#'  ignore: The save operation is expected to not save the contents of the DataFrame
+#'  ignore: The save operation is expected to not save the contents of the SparkDataFrame
 #'     and to not change the existing data. \cr
 #'
-#' @param df A SparkSQL DataFrame
+#' @param df A SparkDataFrame
 #' @param tableName A name for the table
 #' @param source A name for external data source
 #' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname saveAsTable
 #' @name saveAsTable
 #' @export
@@ -2081,7 +2085,7 @@ setMethod("saveDF",
 #' saveAsTable(df, "myfile")
 #' }
 setMethod("saveAsTable",
-          signature(df = "DataFrame", tableName = "character"),
+          signature(df = "SparkDataFrame", tableName = "character"),
           function(df, tableName, source = NULL, mode="error", ...){
             if (is.null(source)) {
               if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
@@ -2109,11 +2113,11 @@ setMethod("saveAsTable",
 #' Computes statistics for numeric columns.
 #' If no columns are given, this function computes statistics for all numerical columns.
 #'
-#' @param x A DataFrame to be computed.
+#' @param x A SparkDataFrame to be computed.
 #' @param col A string of name
 #' @param ... Additional expressions
-#' @return A DataFrame
-#' @family DataFrame functions
+#' @return A SparkDataFrame
+#' @family SparkDataFrame functions
 #' @rdname summary
 #' @name describe
 #' @export
@@ -2128,7 +2132,7 @@ setMethod("saveAsTable",
 #' describe(df, "col1", "col2")
 #' }
 setMethod("describe",
-          signature(x = "DataFrame", col = "character"),
+          signature(x = "SparkDataFrame", col = "character"),
           function(x, col, ...) {
             colList <- list(col, ...)
             sdf <- callJMethod(x@sdf, "describe", colList)
@@ -2138,7 +2142,7 @@ setMethod("describe",
 #' @rdname summary
 #' @name describe
 setMethod("describe",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x) {
             colList <- as.list(c(columns(x)))
             sdf <- callJMethod(x@sdf, "describe", colList)
@@ -2148,7 +2152,7 @@ setMethod("describe",
 #' @rdname summary
 #' @name summary
 setMethod("summary",
-          signature(object = "DataFrame"),
+          signature(object = "SparkDataFrame"),
           function(object, ...) {
             describe(object)
           })
@@ -2156,9 +2160,9 @@ setMethod("summary",
 
 #' dropna
 #'
-#' Returns a new DataFrame omitting rows with null values.
+#' Returns a new SparkDataFrame omitting rows with null values.
 #'
-#' @param x A SparkSQL DataFrame.
+#' @param x A SparkDataFrame.
 #' @param how "any" or "all".
 #'            if "any", drop a row if it contains any nulls.
 #'            if "all", drop a row only if all its values are null.
@@ -2167,9 +2171,9 @@ setMethod("summary",
 #'                    minNonNulls non-null values.
 #'                    This overwrites the how parameter.
 #' @param cols Optional list of column names to consider.
-#' @return A DataFrame
+#' @return A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname nafunctions
 #' @name dropna
 #' @export
@@ -2182,7 +2186,7 @@ setMethod("summary",
 #' dropna(df)
 #' }
 setMethod("dropna",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
             how <- match.arg(how)
             if (is.null(cols)) {
@@ -2202,7 +2206,7 @@ setMethod("dropna",
 #' @name na.omit
 #' @export
 setMethod("na.omit",
-          signature(object = "DataFrame"),
+          signature(object = "SparkDataFrame"),
           function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
             dropna(object, how, minNonNulls, cols)
           })
@@ -2211,7 +2215,7 @@ setMethod("na.omit",
 #'
 #' Replace null values.
 #'
-#' @param x A SparkSQL DataFrame.
+#' @param x A SparkDataFrame.
 #' @param value Value to replace null values with.
 #'              Should be an integer, numeric, character or named list.
 #'              If the value is a named list, then cols is ignored and
@@ -2237,7 +2241,7 @@ setMethod("na.omit",
 #' fillna(df, list("age" = 20, "name" = "unknown"))
 #' }
 setMethod("fillna",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, value, cols = NULL) {
             if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
               stop("value should be an integer, numeric, charactor or named list.")
@@ -2280,14 +2284,14 @@ setMethod("fillna",
             dataFrame(sdf)
           })
 
-#' This function downloads the contents of a DataFrame into an R's data.frame.
+#' This function downloads the contents of a SparkDataFrame into an R's data.frame.
 #' Since data.frames are held in memory, ensure that you have enough memory
 #' in your system to accommodate the contents.
 #'
-#' @title Download data from a DataFrame into a data.frame
-#' @param x a DataFrame
+#' @title Download data from a SparkDataFrame into a data.frame
+#' @param x a SparkDataFrame
 #' @return a data.frame
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname as.data.frame
 #' @examples \dontrun{
 #'
@@ -2295,24 +2299,24 @@ setMethod("fillna",
 #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ])
 #' }
 setMethod("as.data.frame",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, row.names = NULL, optional = FALSE, ...) {
             as.data.frame(collect(x), row.names, optional, ...)
           })
 
-#' The specified DataFrame is attached to the R search path. This means that
-#' the DataFrame is searched by R when evaluating a variable, so columns in
-#' the DataFrame can be accessed by simply giving their names.
+#' The specified SparkDataFrame is attached to the R search path. This means that
+#' the SparkDataFrame is searched by R when evaluating a variable, so columns in
+#' the SparkDataFrame can be accessed by simply giving their names.
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname attach
-#' @title Attach DataFrame to R search path
-#' @param what (DataFrame) The DataFrame to attach
+#' @title Attach SparkDataFrame to R search path
+#' @param what (SparkDataFrame) The SparkDataFrame to attach
 #' @param pos (integer) Specify position in search() where to attach.
-#' @param name (character) Name to use for the attached DataFrame. Names
+#' @param name (character) Name to use for the attached SparkDataFrame. Names
 #'   starting with package: are reserved for library.
 #' @param warn.conflicts (logical) If TRUE, warnings are printed about conflicts
-#' from attaching the database, unless that DataFrame contains an object
+#' from attaching the database, unless that SparkDataFrame contains an object
 #' @examples
 #' \dontrun{
 #' attach(irisDf)
@@ -2320,21 +2324,21 @@ setMethod("as.data.frame",
 #' }
 #' @seealso \link{detach}
 setMethod("attach",
-          signature(what = "DataFrame"),
+          signature(what = "SparkDataFrame"),
           function(what, pos = 2, name = deparse(substitute(what)), warn.conflicts = TRUE) {
             newEnv <- assignNewEnv(what)
             attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts)
           })
 
-#' Evaluate a R expression in an environment constructed from a DataFrame
-#' with() allows access to columns of a DataFrame by simply referring to
-#' their name. It appends every column of a DataFrame into a new
+#' Evaluate a R expression in an environment constructed from a SparkDataFrame
+#' with() allows access to columns of a SparkDataFrame by simply referring to
+#' their name. It appends every column of a SparkDataFrame into a new
 #' environment. Then, the given expression is evaluated in this new
 #' environment.
 #'
 #' @rdname with
-#' @title Evaluate a R expression in an environment constructed from a DataFrame
-#' @param data (DataFrame) DataFrame to use for constructing an environment.
+#' @title Evaluate a R expression in an environment constructed from a SparkDataFrame
+#' @param data (SparkDataFrame) SparkDataFrame to use for constructing an environment.
 #' @param expr (expression) Expression to evaluate.
 #' @param ... arguments to be passed to future methods.
 #' @examples
@@ -2343,28 +2347,28 @@ setMethod("attach",
 #' }
 #' @seealso \link{attach}
 setMethod("with",
-          signature(data = "DataFrame"),
+          signature(data = "SparkDataFrame"),
           function(data, expr, ...) {
             newEnv <- assignNewEnv(data)
             eval(substitute(expr), envir = newEnv, enclos = newEnv)
           })
 
-#' Display the structure of a DataFrame, including column names, column types, as well as a
+#' Display the structure of a SparkDataFrame, including column names, column types, as well as a
 #' a small sample of rows.
 #' @name str
 #' @title Compactly display the structure of a dataset
 #' @rdname str
-#' @family DataFrame functions
-#' @param object a DataFrame
+#' @family SparkDataFrame functions
+#' @param object a SparkDataFrame
 #' @examples \dontrun{
-#' # Create a DataFrame from the Iris dataset
+#' # Create a SparkDataFrame from the Iris dataset
 #' irisDF <- createDataFrame(sqlContext, iris)
 #'
-#' # Show the structure of the DataFrame
+#' # Show the structure of the SparkDataFrame
 #' str(irisDF)
 #' }
 setMethod("str",
-          signature(object = "DataFrame"),
+          signature(object = "SparkDataFrame"),
           function(object) {
 
             # TODO: These could be made global parameters, though in R it's not the case
@@ -2424,14 +2428,14 @@ setMethod("str",
 
 #' drop
 #'
-#' Returns a new DataFrame with columns dropped.
+#' Returns a new SparkDataFrame with columns dropped.
 #' This is a no-op if schema doesn't contain column name(s).
-#' 
-#' @param x A SparkSQL DataFrame.
+#'
+#' @param x A SparkDataFrame.
 #' @param cols A character vector of column names or a Column.
-#' @return A DataFrame
+#' @return A SparkDataFrame
 #'
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname drop
 #' @name drop
 #' @export
@@ -2446,7 +2450,7 @@ setMethod("str",
 #' drop(df, df$col1)
 #' }
 setMethod("drop",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, col) {
             stopifnot(class(col) == "character" || class(col) == "Column")
 
@@ -2465,23 +2469,24 @@ setMethod("drop",
             base::drop(x)
           })
 
-#' Saves the content of the DataFrame to an external database table via JDBC
+#' Saves the content of the SparkDataFrame to an external database table via JDBC
 #'
 #' Additional JDBC database connection properties can be set (...)
 #'
 #' Also, mode is used to specify the behavior of the save operation when
 #' data already exists in the data source. There are four modes: \cr
-#'  append: Contents of this DataFrame are expected to be appended to existing data. \cr
-#'  overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
+#'  append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
+#'  overwrite: Existing data is expected to be overwritten by the contents of this
+#'     SparkDataFrame. \cr
 #'  error: An exception is expected to be thrown. \cr
-#'  ignore: The save operation is expected to not save the contents of the DataFrame
+#'  ignore: The save operation is expected to not save the contents of the SparkDataFrame
 #'     and to not change the existing data. \cr
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param url JDBC database url of the form `jdbc:subprotocol:subname`
 #' @param tableName The name of the table in the external database
 #' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
-#' @family DataFrame functions
+#' @family SparkDataFrame functions
 #' @rdname write.jdbc
 #' @name write.jdbc
 #' @export
@@ -2493,7 +2498,7 @@ setMethod("drop",
 #' write.jdbc(df, jdbcUrl, "table", user = "username", password = "password")
 #' }
 setMethod("write.jdbc",
-          signature(x = "DataFrame", url = "character", tableName = "character"),
+          signature(x = "SparkDataFrame", url = "character", tableName = "character"),
           function(x, url, tableName, mode = "error", ...){
             jmode <- convertToJSaveMode(mode)
             jprops <- varargsToJProperties(...)
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 35c4e6f1af..34d29ddbfd 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -46,7 +46,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
   # RDD has three serialization types:
   # byte: The RDD stores data serialized in R.
   # string: The RDD stores data as strings.
-  # row: The RDD stores the serialized rows of a DataFrame.
+  # row: The RDD stores the serialized rows of a SparkDataFrame.
 
   # We use an environment to store mutable states inside an RDD object.
   # Note that R's call-by-value semantics makes modifying slots inside an
@@ -114,7 +114,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
 #' @noRd
 #' @param jrdd Java object reference to the backing JavaRDD
 #' @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD
-#' stores strings, and "row" if the RDD stores the rows of a DataFrame
+#' stores strings, and "row" if the RDD stores the rows of a SparkDataFrame
 #' @param isCached TRUE if the RDD is cached
 #' @param isCheckpointed TRUE if the RDD has been checkpointed
 RDD <- function(jrdd, serializedMode = "byte", isCached = FALSE,
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index b726c1e1b9..3824e0a995 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -34,7 +34,7 @@ getInternalType <- function(x) {
          Date = "date",
          POSIXlt = "timestamp",
          POSIXct = "timestamp",
-         stop(paste("Unsupported type for DataFrame:", class(x))))
+         stop(paste("Unsupported type for SparkDataFrame:", class(x))))
 }
 
 #' infer the SQL type
@@ -70,14 +70,14 @@ infer_type <- function(x) {
   }
 }
 
-#' Create a DataFrame
+#' Create a SparkDataFrame
 #'
-#' Converts R data.frame or list into DataFrame.
+#' Converts R data.frame or list into SparkDataFrame.
 #'
 #' @param sqlContext A SQLContext
 #' @param data An RDD or list or data.frame
 #' @param schema a list of column names or named list (StructType), optional
-#' @return an DataFrame
+#' @return a SparkDataFrame
 #' @rdname createDataFrame
 #' @export
 #' @examples
@@ -173,11 +173,11 @@ as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
 
 #' toDF
 #'
-#' Converts an RDD to a DataFrame by infer the types.
+#' Converts an RDD to a SparkDataFrame by infer the types.
 #'
 #' @param x An RDD
 #'
-#' @rdname DataFrame
+#' @rdname SparkDataFrame
 #' @noRd
 #' @examples
 #'\dontrun{
@@ -200,14 +200,14 @@ setMethod("toDF", signature(x = "RDD"),
             createDataFrame(sqlContext, x, ...)
           })
 
-#' Create a DataFrame from a JSON file.
+#' Create a SparkDataFrame from a JSON file.
 #'
-#' Loads a JSON file (one object per line), returning the result as a DataFrame
+#' Loads a JSON file (one object per line), returning the result as a SparkDataFrame
 #' It goes through the entire dataset once to determine the schema.
 #'
 #' @param sqlContext SQLContext to use
 #' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @rdname read.json
 #' @name read.json
 #' @export
@@ -238,13 +238,13 @@ jsonFile <- function(sqlContext, path) {
 
 #' JSON RDD
 #'
-#' Loads an RDD storing one JSON object per string as a DataFrame.
+#' Loads an RDD storing one JSON object per string as a SparkDataFrame.
 #'
 #' @param sqlContext SQLContext to use
 #' @param rdd An RDD of JSON string
 #' @param schema A StructType object to use as schema
 #' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A DataFrame
+#' @return A SparkDataFrame
 #' @noRd
 #' @examples
 #'\dontrun{
@@ -268,13 +268,13 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
   }
 }
 
-#' Create a DataFrame from a Parquet file.
+#' Create a SparkDataFrame from a Parquet file.
 #'
-#' Loads a Parquet file, returning the result as a DataFrame.
+#' Loads a Parquet file, returning the result as a SparkDataFrame.
 #'
 #' @param sqlContext SQLContext to use
 #' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @rdname read.parquet
 #' @name read.parquet
 #' @export
@@ -295,14 +295,14 @@ parquetFile <- function(sqlContext, ...) {
   read.parquet(sqlContext, unlist(list(...)))
 }
 
-#' Create a DataFrame from a text file.
+#' Create a SparkDataFrame from a text file.
 #'
-#' Loads a text file and returns a DataFrame with a single string column named "value".
-#' Each line in the text file is a new row in the resulting DataFrame.
+#' Loads a text file and returns a SparkDataFrame with a single string column named "value".
+#' Each line in the text file is a new row in the resulting SparkDataFrame.
 #'
 #' @param sqlContext SQLContext to use
 #' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @rdname read.text
 #' @name read.text
 #' @export
@@ -323,11 +323,11 @@ read.text <- function(sqlContext, path) {
 
 #' SQL Query
 #'
-#' Executes a SQL query using Spark, returning the result as a DataFrame.
+#' Executes a SQL query using Spark, returning the result as a SparkDataFrame.
 #'
 #' @param sqlContext SQLContext to use
 #' @param sqlQuery A character vector containing the SQL query
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @export
 #' @examples
 #'\dontrun{
@@ -344,14 +344,14 @@ sql <- function(sqlContext, sqlQuery) {
  dataFrame(sdf)
 }
 
-#' Create a DataFrame from a SparkSQL Table
+#' Create a SparkDataFrame from a SparkSQL Table
 #'
-#' Returns the specified Table as a DataFrame.  The Table must have already been registered
+#' Returns the specified Table as a SparkDataFrame.  The Table must have already been registered
 #' in the SQLContext.
 #'
 #' @param sqlContext SQLContext to use
-#' @param tableName The SparkSQL Table to convert to a DataFrame.
-#' @return DataFrame
+#' @param tableName The SparkSQL Table to convert to a SparkDataFrame.
+#' @return SparkDataFrame
 #' @rdname tableToDF
 #' @name tableToDF
 #' @export
@@ -372,11 +372,11 @@ tableToDF <- function(sqlContext, tableName) {
 
 #' Tables
 #'
-#' Returns a DataFrame containing names of tables in the given database.
+#' Returns a SparkDataFrame containing names of tables in the given database.
 #'
 #' @param sqlContext SQLContext to use
 #' @param databaseName name of the database
-#' @return a DataFrame
+#' @return a SparkDataFrame
 #' @export
 #' @examples
 #'\dontrun{
@@ -425,7 +425,7 @@ tableNames <- function(sqlContext, databaseName = NULL) {
 #'
 #' @param sqlContext SQLContext to use
 #' @param tableName The name of the table being cached
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @export
 #' @examples
 #'\dontrun{
@@ -447,7 +447,7 @@ cacheTable <- function(sqlContext, tableName) {
 #'
 #' @param sqlContext SQLContext to use
 #' @param tableName The name of the table being uncached
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @export
 #' @examples
 #'\dontrun{
@@ -500,9 +500,9 @@ dropTempTable <- function(sqlContext, tableName) {
   callJMethod(sqlContext, "dropTempTable", tableName)
 }
 
-#' Load an DataFrame
+#' Load a SparkDataFrame
 #'
-#' Returns the dataset in a data source as a DataFrame
+#' Returns the dataset in a data source as a SparkDataFrame
 #'
 #' The data source is specified by the `source` and a set of options(...).
 #' If `source` is not specified, the default data source configured by
@@ -512,7 +512,7 @@ dropTempTable <- function(sqlContext, tableName) {
 #' @param path The path of files to load
 #' @param source The name of external data source
 #' @param schema The data schema defined in structType
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @rdname read.df
 #' @name read.df
 #' @export
@@ -556,7 +556,7 @@ loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
 #' Create an external table
 #'
 #' Creates an external table based on the dataset in a data source,
-#' Returns the DataFrame associated with the external table.
+#' Returns a SparkDataFrame associated with the external table.
 #'
 #' The data source is specified by the `source` and a set of options(...).
 #' If `source` is not specified, the default data source configured by
@@ -566,7 +566,7 @@ loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
 #' @param tableName A name of the table
 #' @param path The path of files to load
 #' @param source the name of external data source
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @export
 #' @examples
 #'\dontrun{
@@ -584,7 +584,7 @@ createExternalTable <- function(sqlContext, tableName, path = NULL, source = NUL
   dataFrame(sdf)
 }
 
-#' Create a DataFrame representing the database table accessible via JDBC URL
+#' Create a SparkDataFrame representing the database table accessible via JDBC URL
 #'
 #' Additional JDBC database connection properties can be set (...)
 #'
@@ -605,7 +605,7 @@ createExternalTable <- function(sqlContext, tableName, path = NULL, source = NUL
 #'                      clause expressions used to split the column `partitionColumn` evenly.
 #'                      This defaults to SparkContext.defaultParallelism when unset.
 #' @param predicates a list of conditions in the where clause; each one defines one partition
-#' @return DataFrame
+#' @return SparkDataFrame
 #' @rdname read.jdbc
 #' @name read.jdbc
 #' @export
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 3ffd9a9890..a3e09372bb 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -22,11 +22,11 @@ NULL
 
 setOldClass("jobj")
 
-#' @title S4 class that represents a DataFrame column
-#' @description The column class supports unary, binary operations on DataFrame columns
+#' @title S4 class that represents a SparkDataFrame column
+#' @description The column class supports unary, binary operations on SparkDataFrame columns
 #' @rdname column
 #'
-#' @slot jc reference to JVM DataFrame column
+#' @slot jc reference to JVM SparkDataFrame column
 #' @export
 setClass("Column",
          slots = list(jc = "jobj"))
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index eefdf17873..ce071b1a84 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -139,7 +139,7 @@ readEnv <- function(con) {
   env
 }
 
-# Read a field of StructType from DataFrame
+# Read a field of StructType from SparkDataFrame
 # into a named list in R whose class is "struct"
 readStruct <- function(con) {
   names <- readObject(con)
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 54234b0455..4a0bdf3315 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2097,7 +2097,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
 #' expr
 #'
 #' Parses the expression string into the column that it represents, similar to
-#' DataFrame.selectExpr
+#' SparkDataFrame.selectExpr
 #'
 #' @family normal_funcs
 #' @rdname expr
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6b67258d77..04274a12bc 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -385,7 +385,7 @@ setGeneric("subtractByKey",
 setGeneric("value", function(bcast) { standardGeneric("value") })
 
 
-####################  DataFrame Methods ########################
+####################  SparkDataFrame Methods ########################
 
 #' @rdname agg
 #' @export
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 23b49aebda..08f4a490c8 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -23,7 +23,7 @@ NULL
 setOldClass("jobj")
 
 #' @title S4 class that represents a GroupedData
-#' @description GroupedDatas can be created using groupBy() on a DataFrame
+#' @description GroupedDatas can be created using groupBy() on a SparkDataFrame
 #' @rdname GroupedData
 #' @seealso groupBy
 #'
@@ -37,7 +37,7 @@ setMethod("initialize", "GroupedData", function(.Object, sgd) {
   .Object
 })
 
-#' @rdname DataFrame
+#' @rdname GroupedData
 groupedData <- function(sgd) {
   new("GroupedData", sgd)
 }
@@ -52,10 +52,10 @@ setMethod("show", "GroupedData",
 #' Count
 #'
 #' Count the number of rows for each group.
-#' The resulting DataFrame will also contain the grouping columns.
+#' The resulting SparkDataFrame will also contain the grouping columns.
 #'
 #' @param x a GroupedData
-#' @return a DataFrame
+#' @return a SparkDataFrame
 #' @rdname agg
 #' @export
 #' @examples
@@ -70,14 +70,14 @@ setMethod("count",
 
 #' summarize
 #'
-#' Aggregates on the entire DataFrame without groups.
-#' The resulting DataFrame will also contain the grouping columns.
+#' Aggregates on the entire SparkDataFrame without groups.
+#' The resulting SparkDataFrame will also contain the grouping columns.
 #'
 #' df2 <- agg(df, <column> = <aggFunction>)
 #' df2 <- agg(df, newColName = aggFunction(column))
 #'
 #' @param x a GroupedData
-#' @return a DataFrame
+#' @return a SparkDataFrame
 #' @rdname summarize
 #' @name agg
 #' @family agg_funcs
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 922a9b13db..7dd82963a1 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -43,7 +43,7 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #'
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param data DataFrame for training.
+#' @param data SparkDataFrame for training.
 #' @param family A description of the error distribution and link function to be used in the model.
 #'               This can be a character string naming a family function, a family function or
 #'               the result of a call to a family function. Refer R family at
@@ -62,7 +62,7 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #' model <- glm(Sepal_Length ~ Sepal_Width, df, family="gaussian")
 #' summary(model)
 #' }
-setMethod("glm", signature(formula = "formula", family = "ANY", data = "DataFrame"),
+setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
           function(formula, family = gaussian, data, epsilon = 1e-06, maxit = 25) {
             if (is.character(family)) {
               family <- get(family, mode = "function", envir = parent.frame())
@@ -155,8 +155,8 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
 #' Makes predictions from a generalized linear model produced by glm(), similarly to R's predict().
 #'
 #' @param object A fitted generalized linear model
-#' @param newData DataFrame for testing
-#' @return DataFrame containing predicted labels in a column named "prediction"
+#' @param newData SparkDataFrame for testing
+#' @return SparkDataFrame containing predicted labels in a column named "prediction"
 #' @rdname predict
 #' @export
 #' @examples
@@ -175,8 +175,8 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
 #' Makes predictions from a model produced by naiveBayes(), similarly to R package e1071's predict.
 #'
 #' @param object A fitted naive Bayes model
-#' @param newData DataFrame for testing
-#' @return DataFrame containing predicted labels in a column named "prediction"
+#' @param newData SparkDataFrame for testing
+#' @return SparkDataFrame containing predicted labels in a column named "prediction"
 #' @rdname predict
 #' @export
 #' @examples
@@ -223,7 +223,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #'
 #' Fit a k-means model, similarly to R's kmeans().
 #'
-#' @param x DataFrame for training
+#' @param x SparkDataFrame for training
 #' @param centers Number of centers
 #' @param iter.max Maximum iteration number
 #' @param algorithm Algorithm choosen to fit the model
@@ -234,7 +234,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 #' \dontrun{
 #' model <- kmeans(x, centers = 2, algorithm="random")
 #' }
-setMethod("kmeans", signature(x = "DataFrame"),
+setMethod("kmeans", signature(x = "SparkDataFrame"),
           function(x, centers, iter.max = 10, algorithm = c("random", "k-means||")) {
             columnNames <- as.array(colnames(x))
             algorithm <- match.arg(algorithm)
@@ -248,7 +248,7 @@ setMethod("kmeans", signature(x = "DataFrame"),
 #' Get fitted result from a k-means model, similarly to R's fitted().
 #'
 #' @param object A fitted k-means model
-#' @return DataFrame containing fitted values
+#' @return SparkDataFrame containing fitted values
 #' @rdname fitted
 #' @export
 #' @examples
@@ -296,8 +296,8 @@ setMethod("summary", signature(object = "KMeansModel"),
 #' Make predictions from a model produced by kmeans().
 #'
 #' @param object A fitted k-means model
-#' @param newData DataFrame for testing
-#' @return DataFrame containing predicted labels in a column named "prediction"
+#' @param newData SparkDataFrame for testing
+#' @return SparkDataFrame containing predicted labels in a column named "prediction"
 #' @rdname predict
 #' @export
 #' @examples
@@ -314,12 +314,12 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' Fit a Bernoulli naive Bayes model
 #'
 #' Fit a Bernoulli naive Bayes model, similarly to R package e1071's naiveBayes() while only
-#' categorical features are supported. The input should be a DataFrame of observations instead of a
-#' contingency table.
+#' categorical features are supported. The input should be a SparkDataFrame of observations instead
+#' of a contingency table.
 #'
 #' @param object A symbolic description of the model to be fitted. Currently only a few formula
 #'               operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param data DataFrame for training
+#' @param data SparkDataFrame for training
 #' @param laplace Smoothing parameter
 #' @return a fitted naive Bayes model
 #' @rdname naiveBayes
@@ -330,7 +330,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' df <- createDataFrame(sqlContext, infert)
 #' model <- naiveBayes(education ~ ., df, laplace = 0)
 #'}
-setMethod("naiveBayes", signature(formula = "formula", data = "DataFrame"),
+setMethod("naiveBayes", signature(formula = "formula", data = "SparkDataFrame"),
           function(formula, data, laplace = 0, ...) {
             formula <- paste(deparse(formula), collapse = "")
             jobj <- callJStatic("org.apache.spark.ml.r.NaiveBayesWrapper", "fit",
@@ -345,7 +345,7 @@ setMethod("naiveBayes", signature(formula = "formula", data = "DataFrame"),
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', ':', '+', and '-'.
 #'                Note that operator '.' is not supported currently.
-#' @param data DataFrame for training.
+#' @param data SparkDataFrame for training.
 #' @return a fitted AFT survival regression model
 #' @rdname survreg
 #' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/}
@@ -355,7 +355,7 @@ setMethod("naiveBayes", signature(formula = "formula", data = "DataFrame"),
 #' df <- createDataFrame(sqlContext, ovarian)
 #' model <- survreg(Surv(futime, fustat) ~ ecog_ps + rx, df)
 #' }
-setMethod("survreg", signature(formula = "formula", data = "DataFrame"),
+setMethod("survreg", signature(formula = "formula", data = "SparkDataFrame"),
           function(formula, data, ...) {
             formula <- paste(deparse(formula), collapse = "")
             jobj <- callJStatic("org.apache.spark.ml.r.AFTSurvivalRegressionWrapper",
@@ -393,8 +393,8 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
 #' Make predictions from a model produced by survreg(), similarly to R package survival's predict.
 #'
 #' @param object A fitted AFT survival regression model
-#' @param newData DataFrame for testing
-#' @return DataFrame containing predicted labels in a column named "prediction"
+#' @param newData SparkDataFrame for testing
+#' @return SparkDataFrame containing predicted labels in a column named "prediction"
 #' @rdname predict
 #' @export
 #' @examples
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index c6ddb56227..039aa008b3 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -16,11 +16,11 @@
 #
 
 # A set of S3 classes and methods that support the SparkSQL `StructType` and `StructField
-# datatypes. These are used to create and interact with DataFrame schemas.
+# datatypes. These are used to create and interact with SparkDataFrame schemas.
 
 #' structType
 #'
-#' Create a structType object that contains the metadata for a DataFrame. Intended for
+#' Create a structType object that contains the metadata for a SparkDataFrame. Intended for
 #' use with createDataFrame and toDF.
 #'
 #' @param x a structField object (created with the field() function)
@@ -171,7 +171,7 @@ checkType <- function(type) {
             })
   }
 
-  stop(paste("Unsupported type for Dataframe:", type))
+  stop(paste("Unsupported type for SparkDataframe:", type))
 }
 
 structField.character <- function(x, type, nullable = TRUE) {
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index edf72937c6..879b664421 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-# stats.R - Statistic functions for DataFrames.
+# stats.R - Statistic functions for SparkDataFrames.
 
 setOldClass("jobj")
 
@@ -41,7 +41,7 @@ setOldClass("jobj")
 #' ct <- crosstab(df, "title", "gender")
 #' }
 setMethod("crosstab",
-          signature(x = "DataFrame", col1 = "character", col2 = "character"),
+          signature(x = "SparkDataFrame", col1 = "character", col2 = "character"),
           function(x, col1, col2) {
             statFunctions <- callJMethod(x@sdf, "stat")
             sct <- callJMethod(statFunctions, "crosstab", col1, col2)
@@ -50,9 +50,9 @@ setMethod("crosstab",
 
 #' cov
 #'
-#' Calculate the sample covariance of two numerical columns of a DataFrame.
+#' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param col1 the name of the first column
 #' @param col2 the name of the second column
 #' @return the covariance of the two columns.
@@ -66,7 +66,7 @@ setMethod("crosstab",
 #' cov <- cov(df, "title", "gender")
 #' }
 setMethod("cov",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, col1, col2) {
             stopifnot(class(col1) == "character" && class(col2) == "character")
             statFunctions <- callJMethod(x@sdf, "stat")
@@ -75,11 +75,11 @@ setMethod("cov",
 
 #' corr
 #'
-#' Calculates the correlation of two columns of a DataFrame.
+#' Calculates the correlation of two columns of a SparkDataFrame.
 #' Currently only supports the Pearson Correlation Coefficient.
 #' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param col1 the name of the first column
 #' @param col2 the name of the second column
 #' @param method Optional. A character specifying the method for calculating the correlation.
@@ -96,7 +96,7 @@ setMethod("cov",
 #' corr <- corr(df, "title", "gender", method = "pearson")
 #' }
 setMethod("corr",
-          signature(x = "DataFrame"),
+          signature(x = "SparkDataFrame"),
           function(x, col1, col2, method = "pearson") {
             stopifnot(class(col1) == "character" && class(col2) == "character")
             statFunctions <- callJMethod(x@sdf, "stat")
@@ -109,7 +109,7 @@ setMethod("corr",
 #' Using the frequent element count algorithm described in
 #' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
 #'
-#' @param x A SparkSQL DataFrame.
+#' @param x A SparkDataFrame.
 #' @param cols A vector column names to search frequent items in.
 #' @param support (Optional) The minimum frequency for an item to be considered `frequent`.
 #'                Should be greater than 1e-4. Default support = 0.01.
@@ -123,7 +123,7 @@ setMethod("corr",
 #' df <- jsonFile(sqlContext, "/path/to/file.json")
 #' fi = freqItems(df, c("title", "gender"))
 #' }
-setMethod("freqItems", signature(x = "DataFrame", cols = "character"),
+setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
           function(x, cols, support = 0.01) {
             statFunctions <- callJMethod(x@sdf, "stat")
             sct <- callJMethod(statFunctions, "freqItems", as.list(cols), support)
@@ -132,18 +132,18 @@ setMethod("freqItems", signature(x = "DataFrame", cols = "character"),
 
 #' approxQuantile
 #'
-#' Calculates the approximate quantiles of a numerical column of a DataFrame.
+#' Calculates the approximate quantiles of a numerical column of a SparkDataFrame.
 #'
 #' The result of this algorithm has the following deterministic bound:
-#' If the DataFrame has N elements and if we request the quantile at probability `p` up to error
-#' `err`, then the algorithm will return a sample `x` from the DataFrame so that the *exact* rank
-#' of `x` is close to (p * N). More precisely,
+#' If the SparkDataFrame has N elements and if we request the quantile at probability `p` up to
+#' error `err`, then the algorithm will return a sample `x` from the SparkDataFrame so that the
+#' *exact* rank of `x` is close to (p * N). More precisely,
 #'   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
 #' This method implements a variation of the Greenwald-Khanna algorithm (with some speed
 #' optimizations). The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670
 #' Space-efficient Online Computation of Quantile Summaries]] by Greenwald and Khanna.
 #'
-#' @param x A SparkSQL DataFrame.
+#' @param x A SparkDataFrame.
 #' @param col The name of the numerical column.
 #' @param probabilities A list of quantile probabilities. Each number must belong to [0, 1].
 #'                      For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
@@ -161,7 +161,7 @@ setMethod("freqItems", signature(x = "DataFrame", cols = "character"),
 #' quantiles <- approxQuantile(df, "key", c(0.5, 0.8), 0.0)
 #' }
 setMethod("approxQuantile",
-          signature(x = "DataFrame", col = "character",
+          signature(x = "SparkDataFrame", col = "character",
                     probabilities = "numeric", relativeError = "numeric"),
           function(x, col, probabilities, relativeError) {
             statFunctions <- callJMethod(x@sdf, "stat")
@@ -173,12 +173,12 @@ setMethod("approxQuantile",
 #'
 #' Returns a stratified sample without replacement based on the fraction given on each stratum.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A SparkDataFrame
 #' @param col column that defines strata
 #' @param fractions A named list giving sampling fraction for each stratum. If a stratum is
 #'                  not specified, we treat its fraction as zero.
 #' @param seed random seed
-#' @return A new DataFrame that represents the stratified sample
+#' @return A new SparkDataFrame that represents the stratified sample
 #'
 #' @rdname statfunctions
 #' @name sampleBy
@@ -189,7 +189,7 @@ setMethod("approxQuantile",
 #' sample <- sampleBy(df, "key", fractions, 36)
 #' }
 setMethod("sampleBy",
-          signature(x = "DataFrame", col = "character",
+          signature(x = "SparkDataFrame", col = "character",
                     fractions = "list", seed = "numeric"),
           function(x, col, fractions, seed) {
             fractionsEnv <- convertNamedListToEnv(fractions)
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index b425ccf6e7..ba7a611e64 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -626,7 +626,7 @@ convertNamedListToEnv <- function(namedList) {
 
 # Assign a new environment for attach() and with() methods
 assignNewEnv <- function(data) {
-  stopifnot(class(data) == "DataFrame")
+  stopifnot(class(data) == "SparkDataFrame")
   cols <- columns(data)
   stopifnot(length(cols) > 0)
 
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index b923ccf6bb..9bd3975405 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -101,8 +101,8 @@ test_that("create DataFrame from RDD", {
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- createDataFrame(sqlContext, rdd, list("a", "b"))
   dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b"))
-  expect_is(df, "DataFrame")
-  expect_is(dfAsDF, "DataFrame")
+  expect_is(df, "SparkDataFrame")
+  expect_is(dfAsDF, "SparkDataFrame")
   expect_equal(count(df), 10)
   expect_equal(count(dfAsDF), 10)
   expect_equal(nrow(df), 10)
@@ -118,21 +118,21 @@ test_that("create DataFrame from RDD", {
 
   df <- createDataFrame(sqlContext, rdd)
   dfAsDF <- as.DataFrame(sqlContext, rdd)
-  expect_is(df, "DataFrame")
-  expect_is(dfAsDF, "DataFrame")
+  expect_is(df, "SparkDataFrame")
+  expect_is(dfAsDF, "SparkDataFrame")
   expect_equal(columns(df), c("_1", "_2"))
   expect_equal(columns(dfAsDF), c("_1", "_2"))
 
   schema <- structType(structField(x = "a", type = "integer", nullable = TRUE),
                         structField(x = "b", type = "string", nullable = TRUE))
   df <- createDataFrame(sqlContext, rdd, schema)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
 
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(a = x, b = as.character(x)) })
   df <- createDataFrame(sqlContext, rdd)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 10)
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
@@ -155,7 +155,7 @@ test_that("create DataFrame from RDD", {
                         age = c(19L, 23L, 18L),
                         height = c(176.5, 181.4, 173.7))
   df <- createDataFrame(sqlContext, localDF, schema)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 3)
   expect_equal(columns(df), c("name", "age", "height"))
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
@@ -218,25 +218,25 @@ test_that("convert NAs to null type in DataFrames", {
 test_that("toDF", {
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- toDF(rdd, list("a", "b"))
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 10)
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
 
   df <- toDF(rdd)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(columns(df), c("_1", "_2"))
 
   schema <- structType(structField(x = "a", type = "integer", nullable = TRUE),
                         structField(x = "b", type = "string", nullable = TRUE))
   df <- toDF(rdd, schema)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
 
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(a = x, b = as.character(x)) })
   df <- toDF(rdd)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 10)
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
@@ -377,7 +377,7 @@ test_that("Collect DataFrame with complex types", {
 test_that("read/write json files", {
   # Test read.df
   df <- read.df(sqlContext, jsonPath, "json")
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 3)
 
   # Test read.df with a user defined schema
@@ -385,17 +385,17 @@ test_that("read/write json files", {
                        structField("age", type = "double"))
 
   df1 <- read.df(sqlContext, jsonPath, "json", schema)
-  expect_is(df1, "DataFrame")
+  expect_is(df1, "SparkDataFrame")
   expect_equal(dtypes(df1), list(c("name", "string"), c("age", "double")))
 
   # Test loadDF
   df2 <- loadDF(sqlContext, jsonPath, "json", schema)
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(dtypes(df2), list(c("name", "string"), c("age", "double")))
 
   # Test read.json
   df <- read.json(sqlContext, jsonPath)
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 3)
 
   # Test write.df
@@ -408,11 +408,11 @@ test_that("read/write json files", {
 
   # Test read.json()/jsonFile() works with multiple input paths
   jsonDF1 <- read.json(sqlContext, c(jsonPath2, jsonPath3))
-  expect_is(jsonDF1, "DataFrame")
+  expect_is(jsonDF1, "SparkDataFrame")
   expect_equal(count(jsonDF1), 6)
   # Suppress warnings because jsonFile is deprecated
   jsonDF2 <- suppressWarnings(jsonFile(sqlContext, c(jsonPath2, jsonPath3)))
-  expect_is(jsonDF2, "DataFrame")
+  expect_is(jsonDF2, "SparkDataFrame")
   expect_equal(count(jsonDF2), 6)
 
   unlink(jsonPath2)
@@ -423,12 +423,12 @@ test_that("jsonRDD() on a RDD with json string", {
   rdd <- parallelize(sc, mockLines)
   expect_equal(count(rdd), 3)
   df <- suppressWarnings(jsonRDD(sqlContext, rdd))
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 3)
 
   rdd2 <- flatMap(rdd, function(x) c(x, x))
   df <- suppressWarnings(jsonRDD(sqlContext, rdd2))
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 6)
 })
 
@@ -454,7 +454,7 @@ test_that("registerTempTable() results in a queryable table and sql() results in
   df <- read.json(sqlContext, jsonPath)
   registerTempTable(df, "table1")
   newdf <- sql(sqlContext, "SELECT * FROM table1 where name = 'Michael'")
-  expect_is(newdf, "DataFrame")
+  expect_is(newdf, "SparkDataFrame")
   expect_equal(count(newdf), 1)
   dropTempTable(sqlContext, "table1")
 })
@@ -493,7 +493,7 @@ test_that("tableToDF() returns a new DataFrame", {
   df <- read.json(sqlContext, jsonPath)
   registerTempTable(df, "table1")
   tabledf <- tableToDF(sqlContext, "table1")
-  expect_is(tabledf, "DataFrame")
+  expect_is(tabledf, "SparkDataFrame")
   expect_equal(count(tabledf), 3)
   tabledf2 <- tableToDF(sqlContext, "table1")
   expect_equal(count(tabledf2), 3)
@@ -595,7 +595,7 @@ test_that("collect() returns a data.frame", {
 test_that("limit() returns DataFrame with the correct number of rows", {
   df <- read.json(sqlContext, jsonPath)
   dfLimited <- limit(df, 2)
-  expect_is(dfLimited, "DataFrame")
+  expect_is(dfLimited, "SparkDataFrame")
   expect_equal(count(dfLimited), 2)
 })
 
@@ -750,11 +750,11 @@ test_that("distinct(), unique() and dropDuplicates() on DataFrames", {
 
   df <- read.json(sqlContext, jsonPathWithDup)
   uniques <- distinct(df)
-  expect_is(uniques, "DataFrame")
+  expect_is(uniques, "SparkDataFrame")
   expect_equal(count(uniques), 3)
 
   uniques2 <- unique(df)
-  expect_is(uniques2, "DataFrame")
+  expect_is(uniques2, "SparkDataFrame")
   expect_equal(count(uniques2), 3)
 
   # Test dropDuplicates()
@@ -798,7 +798,7 @@ test_that("sample on a DataFrame", {
   df <- read.json(sqlContext, jsonPath)
   sampled <- sample(df, FALSE, 1.0)
   expect_equal(nrow(collect(sampled)), count(df))
-  expect_is(sampled, "DataFrame")
+  expect_is(sampled, "SparkDataFrame")
   sampled2 <- sample(df, FALSE, 0.1, 0) # set seed for predictable result
   expect_true(count(sampled2) < 3)
 
@@ -822,11 +822,11 @@ test_that("select operators", {
   expect_is(df[[2]], "Column")
   expect_is(df[["age"]], "Column")
 
-  expect_is(df[, 1], "DataFrame")
+  expect_is(df[, 1], "SparkDataFrame")
   expect_equal(columns(df[, 1]), c("name"))
   expect_equal(columns(df[, "age"]), c("age"))
   df2 <- df[, c("age", "name")]
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(columns(df2), c("age", "name"))
 
   df$age2 <- df$age
@@ -890,7 +890,7 @@ test_that("subsetting", {
   expect_equal(collect(filtered)$name, "Andy")
 
   df2 <- df[df$age == 19, 1]
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(count(df2), 1)
   expect_equal(columns(df2), c("name"))
   expect_equal(collect(df2)$name, "Justin")
@@ -940,7 +940,7 @@ test_that("column calculation", {
   d <- collect(select(df, alias(df$age + 1, "age2")))
   expect_equal(names(d), c("age2"))
   df2 <- select(df, lower(df$name), abs(df$age))
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(count(df2), 3)
 })
 
@@ -953,30 +953,30 @@ test_that("test HiveContext", {
     skip("Hive is not build with SparkSQL, skipped")
   })
   df <- createExternalTable(hiveCtx, "json", jsonPath, "json")
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(count(df), 3)
   df2 <- sql(hiveCtx, "select * from json")
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(count(df2), 3)
 
   jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
   invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2))
   df3 <- sql(hiveCtx, "select * from json2")
-  expect_is(df3, "DataFrame")
+  expect_is(df3, "SparkDataFrame")
   expect_equal(count(df3), 3)
   unlink(jsonPath2)
 
   hivetestDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
   invisible(saveAsTable(df, "hivetestbl", path = hivetestDataPath))
   df4 <- sql(hiveCtx, "select * from hivetestbl")
-  expect_is(df4, "DataFrame")
+  expect_is(df4, "SparkDataFrame")
   expect_equal(count(df4), 3)
   unlink(hivetestDataPath)
 
   parquetDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
   invisible(saveAsTable(df, "parquetest", "parquet", mode = "overwrite", path = parquetDataPath))
   df5 <- sql(hiveCtx, "select * from parquetest")
-  expect_is(df5, "DataFrame")
+  expect_is(df5, "SparkDataFrame")
   expect_equal(count(df5), 3)
   unlink(parquetDataPath)
 })
@@ -1272,28 +1272,28 @@ test_that("group by, agg functions", {
   gd <- groupBy(df, "name")
   expect_is(gd, "GroupedData")
   df2 <- count(gd)
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(3, count(df2))
 
   # Also test group_by, summarize, mean
   gd1 <- group_by(df, "name")
   expect_is(gd1, "GroupedData")
   df_summarized <- summarize(gd, mean_age = mean(df$age))
-  expect_is(df_summarized, "DataFrame")
+  expect_is(df_summarized, "SparkDataFrame")
   expect_equal(3, count(df_summarized))
 
   df3 <- agg(gd, age = "stddev")
-  expect_is(df3, "DataFrame")
+  expect_is(df3, "SparkDataFrame")
   df3_local <- collect(df3)
   expect_true(is.nan(df3_local[df3_local$name == "Andy", ][1, 2]))
 
   df4 <- agg(gd, sumAge = sum(df$age))
-  expect_is(df4, "DataFrame")
+  expect_is(df4, "SparkDataFrame")
   expect_equal(3, count(df4))
   expect_equal(columns(df4), c("name", "sumAge"))
 
   df5 <- sum(gd, "age")
-  expect_is(df5, "DataFrame")
+  expect_is(df5, "SparkDataFrame")
   expect_equal(3, count(df5))
 
   expect_equal(3, count(mean(gd)))
@@ -1521,22 +1521,22 @@ test_that("unionAll(), rbind(), except(), and intersect() on a DataFrame", {
   df2 <- read.df(sqlContext, jsonPath2, "json")
 
   unioned <- arrange(unionAll(df, df2), df$age)
-  expect_is(unioned, "DataFrame")
+  expect_is(unioned, "SparkDataFrame")
   expect_equal(count(unioned), 6)
   expect_equal(first(unioned)$name, "Michael")
 
   unioned2 <- arrange(rbind(unioned, df, df2), df$age)
-  expect_is(unioned2, "DataFrame")
+  expect_is(unioned2, "SparkDataFrame")
   expect_equal(count(unioned2), 12)
   expect_equal(first(unioned2)$name, "Michael")
 
   excepted <- arrange(except(df, df2), desc(df$age))
-  expect_is(unioned, "DataFrame")
+  expect_is(unioned, "SparkDataFrame")
   expect_equal(count(excepted), 2)
   expect_equal(first(excepted)$name, "Justin")
 
   intersected <- arrange(intersect(df, df2), df$age)
-  expect_is(unioned, "DataFrame")
+  expect_is(unioned, "SparkDataFrame")
   expect_equal(count(intersected), 1)
   expect_equal(first(intersected)$name, "Andy")
 
@@ -1601,7 +1601,7 @@ test_that("read/write Parquet files", {
   # Test write.df and read.df
   write.df(df, parquetPath, "parquet", mode = "overwrite")
   df2 <- read.df(sqlContext, parquetPath, "parquet")
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(count(df2), 3)
 
   # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
@@ -1610,10 +1610,10 @@ test_that("read/write Parquet files", {
   parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
   suppressWarnings(saveAsParquetFile(df, parquetPath3))
   parquetDF <- read.parquet(sqlContext, c(parquetPath2, parquetPath3))
-  expect_is(parquetDF, "DataFrame")
+  expect_is(parquetDF, "SparkDataFrame")
   expect_equal(count(parquetDF), count(df) * 2)
   parquetDF2 <- suppressWarnings(parquetFile(sqlContext, parquetPath2, parquetPath3))
-  expect_is(parquetDF2, "DataFrame")
+  expect_is(parquetDF2, "SparkDataFrame")
   expect_equal(count(parquetDF2), count(df) * 2)
 
   # Test if varargs works with variables
@@ -1630,7 +1630,7 @@ test_that("read/write Parquet files", {
 test_that("read/write text files", {
   # Test write.df and read.df
   df <- read.df(sqlContext, jsonPath, "text")
-  expect_is(df, "DataFrame")
+  expect_is(df, "SparkDataFrame")
   expect_equal(colnames(df), c("value"))
   expect_equal(count(df), 3)
   textPath <- tempfile(pattern = "textPath", fileext = ".txt")
@@ -1640,7 +1640,7 @@ test_that("read/write text files", {
   textPath2 <- tempfile(pattern = "textPath2", fileext = ".txt")
   write.text(df, textPath2)
   df2 <- read.text(sqlContext, c(textPath, textPath2))
-  expect_is(df2, "DataFrame")
+  expect_is(df2, "SparkDataFrame")
   expect_equal(colnames(df2), c("value"))
   expect_equal(count(df2), count(df) * 2)
 
@@ -1877,7 +1877,7 @@ test_that("attach() on a DataFrame", {
   df <- read.json(sqlContext, jsonPath)
   expect_error(age)
   attach(df)
-  expect_is(age, "DataFrame")
+  expect_is(age, "SparkDataFrame")
   expected_age <- data.frame(age = c(NA, 30, 19))
   expect_equal(head(age), expected_age)
   stat <- summary(age)
@@ -1936,7 +1936,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
 
   expect_error(coltypes(df) <- c("character"),
-               "Length of type vector should match the number of columns for DataFrame")
+               "Length of type vector should match the number of columns for SparkDataFrame")
   expect_error(coltypes(df) <- c("environment", "list"),
                "Only atomic type is supported for column types")
 })
@@ -1950,7 +1950,7 @@ test_that("Method str()", {
 
   out <- capture.output(str(irisDF2))
   expect_equal(length(out), 7)
-  expect_equal(out[1], "'DataFrame': 6 variables:")
+  expect_equal(out[1], "'SparkDataFrame': 6 variables:")
   expect_equal(out[2], " $ Sepal_Length: num 5.1 4.9 4.7 4.6 5 5.4")
   expect_equal(out[3], " $ Sepal_Width : num 3.5 3 3.2 3.1 3.6 3.9")
   expect_equal(out[4], " $ Petal_Length: num 1.4 1.4 1.3 1.5 1.4 1.7")
author	felixcheung <felixcheung_m@hotmail.com>	2016-04-23 00:20:27 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-04-23 00:20:27 -0700
commit	a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1 (patch)
tree	3c29aa4d17cad1c88f6eb989cae5a207077de689 /R
parent	86ca8fefc8a147b31952b8a00e58e46d93bb8bc4 (diff)
download	spark-a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1.tar.gz spark-a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1.tar.bz2 spark-a55fbe2a16aa0866ff8aca25bf9f772e6eb516a1.zip