aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorlewuathe <lewuathe@me.com>2015-11-03 16:38:22 -0800
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-11-03 16:38:22 -0800
commitd648a4ad546eb05deab1005e92b815b2cbea621b (patch)
tree95a7a41e34527a3f1851f2ce201417b23c0ed994 /R/pkg
parent5051262d4ca6a2c529c9b1ba86d54cce60a7af17 (diff)
downloadspark-d648a4ad546eb05deab1005e92b815b2cbea621b.tar.gz
spark-d648a4ad546eb05deab1005e92b815b2cbea621b.tar.bz2
spark-d648a4ad546eb05deab1005e92b815b2cbea621b.zip
[DOC] Missing link to R DataFrame API doc
Author: lewuathe <lewuathe@me.com> Author: Lewuathe <lewuathe@me.com> Closes #9394 from Lewuathe/missing-link-to-R-dataframe.
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/R/DataFrame.R105
1 files changed, 97 insertions, 8 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 87a2c66ffd..df5bc81371 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -23,15 +23,23 @@ NULL
setOldClass("jobj")
#' @title S4 class that represents a DataFrame
-#' @description DataFrames can be created using functions like
-#' \code{jsonFile}, \code{table} etc.
+#' @description DataFrames can be created using functions like \link{createDataFrame},
+#' \link{jsonFile}, \link{table} etc.
+#' @family dataframe_funcs
#' @rdname DataFrame
-#' @seealso jsonFile, table
#' @docType class
#'
#' @slot env An R environment that stores bookkeeping states of the DataFrame
#' @slot sdf A Java object reference to the backing Scala DataFrame
+#' @seealso \link{createDataFrame}, \link{jsonFile}, \link{table}
+#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes}
#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' df <- createDataFrame(sqlContext, faithful)
+#'}
setClass("DataFrame",
slots = list(env = "environment",
sdf = "jobj"))
@@ -46,7 +54,6 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
#' @rdname DataFrame
#' @export
-#'
#' @param sdf A Java object reference to the backing Scala DataFrame
#' @param isCached TRUE if the dataFrame is cached
dataFrame <- function(sdf, isCached = FALSE) {
@@ -61,6 +68,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname printSchema
#' @name printSchema
#' @export
@@ -85,6 +93,7 @@ setMethod("printSchema",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname schema
#' @name schema
#' @export
@@ -108,6 +117,7 @@ setMethod("schema",
#'
#' @param x A SparkSQL DataFrame
#' @param extended Logical. If extended is False, explain() only prints the physical plan.
+#' @family dataframe_funcs
#' @rdname explain
#' @name explain
#' @export
@@ -138,6 +148,7 @@ setMethod("explain",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname isLocal
#' @name isLocal
#' @export
@@ -162,6 +173,7 @@ setMethod("isLocal",
#' @param x A SparkSQL DataFrame
#' @param numRows The number of rows to print. Defaults to 20.
#'
+#' @family dataframe_funcs
#' @rdname showDF
#' @name showDF
#' @export
@@ -186,6 +198,7 @@ setMethod("showDF",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname show
#' @name show
#' @export
@@ -212,6 +225,7 @@ setMethod("show", "DataFrame",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname dtypes
#' @name dtypes
#' @export
@@ -237,6 +251,7 @@ setMethod("dtypes",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname columns
#' @name columns
#' @aliases names
@@ -257,6 +272,7 @@ setMethod("columns",
})
})
+#' @family dataframe_funcs
#' @rdname columns
#' @name names
setMethod("names",
@@ -265,6 +281,7 @@ setMethod("names",
columns(x)
})
+#' @family dataframe_funcs
#' @rdname columns
#' @name names<-
setMethod("names<-",
@@ -283,6 +300,7 @@ setMethod("names<-",
#' @param x A SparkSQL DataFrame
#' @param tableName A character vector containing the name of the table
#'
+#' @family dataframe_funcs
#' @rdname registerTempTable
#' @name registerTempTable
#' @export
@@ -310,6 +328,7 @@ setMethod("registerTempTable",
#' @param overwrite A logical argument indicating whether or not to overwrite
#' the existing rows in the table.
#'
+#' @family dataframe_funcs
#' @rdname insertInto
#' @name insertInto
#' @export
@@ -334,6 +353,7 @@ setMethod("insertInto",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname cache
#' @name cache
#' @export
@@ -360,6 +380,8 @@ setMethod("cache",
#' \url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}.
#'
#' @param x The DataFrame to persist
+#'
+#' @family dataframe_funcs
#' @rdname persist
#' @name persist
#' @export
@@ -386,6 +408,8 @@ setMethod("persist",
#'
#' @param x The DataFrame to unpersist
#' @param blocking Whether to block until all blocks are deleted
+#'
+#' @family dataframe_funcs
#' @rdname unpersist-methods
#' @name unpersist
#' @export
@@ -412,6 +436,8 @@ setMethod("unpersist",
#'
#' @param x A SparkSQL DataFrame
#' @param numPartitions The number of partitions to use.
+#'
+#' @family dataframe_funcs
#' @rdname repartition
#' @name repartition
#' @export
@@ -435,8 +461,10 @@ setMethod("repartition",
# Convert the rows of a DataFrame into JSON objects and return an RDD where
# each element contains a JSON string.
#
-#@param x A SparkSQL DataFrame
+# @param x A SparkSQL DataFrame
# @return A StringRRDD of JSON objects
+#
+# @family dataframe_funcs
# @rdname tojson
# @export
# @examples
@@ -462,6 +490,8 @@ setMethod("toJSON",
#'
#' @param x A SparkSQL DataFrame
#' @param path The directory where the file is saved
+#'
+#' @family dataframe_funcs
#' @rdname saveAsParquetFile
#' @name saveAsParquetFile
#' @export
@@ -484,6 +514,8 @@ setMethod("saveAsParquetFile",
#' Return a new DataFrame containing the distinct rows in this DataFrame.
#'
#' @param x A SparkSQL DataFrame
+#'
+#' @family dataframe_funcs
#' @rdname distinct
#' @name distinct
#' @export
@@ -506,6 +538,7 @@ setMethod("distinct",
#
#' @description Returns a new DataFrame containing distinct rows in this DataFrame
#'
+#' @family dataframe_funcs
#' @rdname unique
#' @name unique
#' @aliases distinct
@@ -522,6 +555,8 @@ setMethod("unique",
#' @param x A SparkSQL DataFrame
#' @param withReplacement Sampling with replacement or not
#' @param fraction The (rough) sample target fraction
+#'
+#' @family dataframe_funcs
#' @rdname sample
#' @aliases sample_frac
#' @export
@@ -545,6 +580,7 @@ setMethod("sample",
dataFrame(sdf)
})
+#' @family dataframe_funcs
#' @rdname sample
#' @name sample_frac
setMethod("sample_frac",
@@ -560,6 +596,7 @@ setMethod("sample_frac",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname count
#' @name count
#' @aliases nrow
@@ -583,6 +620,7 @@ setMethod("count",
#'
#' @name nrow
#'
+#' @family dataframe_funcs
#' @rdname nrow
#' @aliases count
setMethod("nrow",
@@ -595,6 +633,7 @@ setMethod("nrow",
#'
#' @param x a SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname ncol
#' @name ncol
#' @export
@@ -615,6 +654,7 @@ setMethod("ncol",
#' Returns the dimentions (number of rows and columns) of a DataFrame
#' @param x a SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname dim
#' @name dim
#' @export
@@ -637,6 +677,8 @@ setMethod("dim",
#' @param x A SparkSQL DataFrame
#' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
#' should be converted to factors. FALSE by default.
+#'
+#' @family dataframe_funcs
#' @rdname collect
#' @name collect
#' @export
@@ -704,6 +746,7 @@ setMethod("collect",
#' @param num The number of rows to return
#' @return A new DataFrame containing the number of rows specified.
#'
+#' @family dataframe_funcs
#' @rdname limit
#' @name limit
#' @export
@@ -724,6 +767,7 @@ setMethod("limit",
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
#'
+#' @family dataframe_funcs
#' @rdname take
#' @name take
#' @export
@@ -752,6 +796,7 @@ setMethod("take",
#' @param num The number of rows to return. Default is 6.
#' @return A data.frame
#'
+#' @family dataframe_funcs
#' @rdname head
#' @name head
#' @export
@@ -774,6 +819,7 @@ setMethod("head",
#'
#' @param x A SparkSQL DataFrame
#'
+#' @family dataframe_funcs
#' @rdname first
#' @name first
#' @export
@@ -797,6 +843,7 @@ setMethod("first",
#
# @param x A Spark DataFrame
#
+# @family dataframe_funcs
# @rdname DataFrame
# @export
# @examples
@@ -827,6 +874,7 @@ setMethod("toRDD",
#' @return a GroupedData
#' @seealso GroupedData
#' @aliases group_by
+#' @family dataframe_funcs
#' @rdname groupBy
#' @name groupBy
#' @export
@@ -851,6 +899,7 @@ setMethod("groupBy",
groupedData(sgd)
})
+#' @family dataframe_funcs
#' @rdname groupBy
#' @name group_by
setMethod("group_by",
@@ -864,6 +913,7 @@ setMethod("group_by",
#' Compute aggregates by specifying a list of columns
#'
#' @param x a DataFrame
+#' @family dataframe_funcs
#' @rdname agg
#' @name agg
#' @aliases summarize
@@ -874,6 +924,7 @@ setMethod("agg",
agg(groupBy(x), ...)
})
+#' @family dataframe_funcs
#' @rdname agg
#' @name summarize
setMethod("summarize",
@@ -889,6 +940,7 @@ setMethod("summarize",
# the requested map function. #
###################################################################################
+# @family dataframe_funcs
# @rdname lapply
setMethod("lapply",
signature(X = "DataFrame", FUN = "function"),
@@ -897,6 +949,7 @@ setMethod("lapply",
lapply(rdd, FUN)
})
+# @family dataframe_funcs
# @rdname lapply
setMethod("map",
signature(X = "DataFrame", FUN = "function"),
@@ -904,6 +957,7 @@ setMethod("map",
lapply(X, FUN)
})
+# @family dataframe_funcs
# @rdname flatMap
setMethod("flatMap",
signature(X = "DataFrame", FUN = "function"),
@@ -911,7 +965,7 @@ setMethod("flatMap",
rdd <- toRDD(X)
flatMap(rdd, FUN)
})
-
+# @family dataframe_funcs
# @rdname lapplyPartition
setMethod("lapplyPartition",
signature(X = "DataFrame", FUN = "function"),
@@ -920,6 +974,7 @@ setMethod("lapplyPartition",
lapplyPartition(rdd, FUN)
})
+# @family dataframe_funcs
# @rdname lapplyPartition
setMethod("mapPartitions",
signature(X = "DataFrame", FUN = "function"),
@@ -927,6 +982,7 @@ setMethod("mapPartitions",
lapplyPartition(X, FUN)
})
+# @family dataframe_funcs
# @rdname foreach
setMethod("foreach",
signature(x = "DataFrame", func = "function"),
@@ -935,6 +991,7 @@ setMethod("foreach",
foreach(rdd, func)
})
+# @family dataframe_funcs
# @rdname foreach
setMethod("foreachPartition",
signature(x = "DataFrame", func = "function"),
@@ -1034,6 +1091,7 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
#' @param select expression for the single Column or a list of columns to select from the DataFrame
#' @return A new DataFrame containing only the rows that meet the condition with selected columns
#' @export
+#' @family dataframe_funcs
#' @rdname subset
#' @name subset
#' @aliases [
@@ -1064,6 +1122,7 @@ setMethod("subset", signature(x = "DataFrame"),
#' @param col A list of columns or single Column or name
#' @return A new DataFrame with selected columns
#' @export
+#' @family dataframe_funcs
#' @rdname select
#' @name select
#' @family subsetting functions
@@ -1091,6 +1150,7 @@ setMethod("select", signature(x = "DataFrame", col = "character"),
}
})
+#' @family dataframe_funcs
#' @rdname select
#' @export
setMethod("select", signature(x = "DataFrame", col = "Column"),
@@ -1102,6 +1162,7 @@ setMethod("select", signature(x = "DataFrame", col = "Column"),
dataFrame(sdf)
})
+#' @family dataframe_funcs
#' @rdname select
#' @export
setMethod("select",
@@ -1126,6 +1187,7 @@ setMethod("select",
#' @param expr A string containing a SQL expression
#' @param ... Additional expressions
#' @return A DataFrame
+#' @family dataframe_funcs
#' @rdname selectExpr
#' @name selectExpr
#' @export
@@ -1153,6 +1215,7 @@ setMethod("selectExpr",
#' @param colName A string containing the name of the new column.
#' @param col A Column expression.
#' @return A DataFrame with the new column added.
+#' @family dataframe_funcs
#' @rdname withColumn
#' @name withColumn
#' @aliases mutate transform
@@ -1178,6 +1241,7 @@ setMethod("withColumn",
#' @param .data A DataFrame
#' @param col a named argument of the form name = col
#' @return A new DataFrame with the new columns added.
+#' @family dataframe_funcs
#' @rdname withColumn
#' @name mutate
#' @aliases withColumn transform
@@ -1211,6 +1275,7 @@ setMethod("mutate",
})
#' @export
+#' @family dataframe_funcs
#' @rdname withColumn
#' @name transform
#' @aliases withColumn mutate
@@ -1228,6 +1293,7 @@ setMethod("transform",
#' @param existingCol The name of the column you want to change.
#' @param newCol The new column name.
#' @return A DataFrame with the column name changed.
+#' @family dataframe_funcs
#' @rdname withColumnRenamed
#' @name withColumnRenamed
#' @export
@@ -1259,6 +1325,7 @@ setMethod("withColumnRenamed",
#' @param x A DataFrame
#' @param newCol A named pair of the form new_column_name = existing_column
#' @return A DataFrame with the column name changed.
+#' @family dataframe_funcs
#' @rdname withColumnRenamed
#' @name rename
#' @aliases withColumnRenamed
@@ -1303,6 +1370,7 @@ setClassUnion("characterOrColumn", c("character", "Column"))
#' @param decreasing A logical argument indicating sorting order for columns when
#' a character vector is specified for col
#' @return A DataFrame where all elements are sorted.
+#' @family dataframe_funcs
#' @rdname arrange
#' @name arrange
#' @aliases orderby
@@ -1329,6 +1397,7 @@ setMethod("arrange",
dataFrame(sdf)
})
+#' @family dataframe_funcs
#' @rdname arrange
#' @export
setMethod("arrange",
@@ -1360,6 +1429,7 @@ setMethod("arrange",
do.call("arrange", c(x, jcols))
})
+#' @family dataframe_funcs
#' @rdname arrange
#' @name orderby
setMethod("orderBy",
@@ -1376,6 +1446,7 @@ setMethod("orderBy",
#' @param condition The condition to filter on. This may either be a Column expression
#' or a string containing a SQL statement
#' @return A DataFrame containing only the rows that meet the condition.
+#' @family dataframe_funcs
#' @rdname filter
#' @name filter
#' @family subsetting functions
@@ -1399,6 +1470,7 @@ setMethod("filter",
dataFrame(sdf)
})
+#' @family dataframe_funcs
#' @rdname filter
#' @name where
setMethod("where",
@@ -1419,6 +1491,7 @@ setMethod("where",
#' 'inner', 'outer', 'full', 'fullouter', leftouter', 'left_outer', 'left',
#' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
#' @return A DataFrame containing the result of the join operation.
+#' @family dataframe_funcs
#' @rdname join
#' @name join
#' @export
@@ -1477,6 +1550,7 @@ setMethod("join",
#' be returned. If all.x is set to FALSE and all.y is set to TRUE, a right
#' outer join will be returned. If all.x and all.y are set to TRUE, a full
#' outer join will be returned.
+#' @family dataframe_funcs
#' @rdname merge
#' @export
#' @examples
@@ -1608,6 +1682,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
#' @return A DataFrame containing the result of the union.
+#' @family dataframe_funcs
#' @rdname unionAll
#' @name unionAll
#' @export
@@ -1627,9 +1702,10 @@ setMethod("unionAll",
})
#' @title Union two or more DataFrames
-#
+#'
#' @description Returns a new DataFrame containing rows of all parameters.
-#
+#'
+#' @family dataframe_funcs
#' @rdname rbind
#' @name rbind
#' @aliases unionAll
@@ -1651,6 +1727,7 @@ setMethod("rbind",
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
#' @return A DataFrame containing the result of the intersect.
+#' @family dataframe_funcs
#' @rdname intersect
#' @name intersect
#' @export
@@ -1677,6 +1754,7 @@ setMethod("intersect",
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
#' @return A DataFrame containing the result of the except operation.
+#' @family dataframe_funcs
#' @rdname except
#' @name except
#' @export
@@ -1716,6 +1794,7 @@ setMethod("except",
#' @param source A name for external data source
#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode
#'
+#' @family dataframe_funcs
#' @rdname write.df
#' @name write.df
#' @aliases saveDF
@@ -1751,6 +1830,7 @@ setMethod("write.df",
callJMethod(df@sdf, "save", source, jmode, options)
})
+#' @family dataframe_funcs
#' @rdname write.df
#' @name saveDF
#' @export
@@ -1781,6 +1861,7 @@ setMethod("saveDF",
#' @param source A name for external data source
#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode
#'
+#' @family dataframe_funcs
#' @rdname saveAsTable
#' @name saveAsTable
#' @export
@@ -1821,6 +1902,7 @@ setMethod("saveAsTable",
#' @param col A string of name
#' @param ... Additional expressions
#' @return A DataFrame
+#' @family dataframe_funcs
#' @rdname describe
#' @name describe
#' @aliases summary
@@ -1843,6 +1925,7 @@ setMethod("describe",
dataFrame(sdf)
})
+#' @family dataframe_funcs
#' @rdname describe
#' @name describe
setMethod("describe",
@@ -1857,6 +1940,7 @@ setMethod("describe",
#'
#' @description Computes statistics for numeric columns of the DataFrame
#'
+#' @family dataframe_funcs
#' @rdname summary
#' @name summary
setMethod("summary",
@@ -1881,6 +1965,7 @@ setMethod("summary",
#' @param cols Optional list of column names to consider.
#' @return A DataFrame
#'
+#' @family dataframe_funcs
#' @rdname nafunctions
#' @name dropna
#' @aliases na.omit
@@ -1910,6 +1995,7 @@ setMethod("dropna",
dataFrame(sdf)
})
+#' @family dataframe_funcs
#' @rdname nafunctions
#' @name na.omit
#' @export
@@ -1937,6 +2023,7 @@ setMethod("na.omit",
#' column is simply ignored.
#' @return A DataFrame
#'
+#' @family dataframe_funcs
#' @rdname nafunctions
#' @name fillna
#' @export
@@ -2000,6 +2087,7 @@ setMethod("fillna",
#' @title Download data from a DataFrame into a data.frame
#' @param x a DataFrame
#' @return a data.frame
+#' @family dataframe_funcs
#' @rdname as.data.frame
#' @examples \dontrun{
#'
@@ -2020,6 +2108,7 @@ setMethod("as.data.frame",
#' the DataFrame is searched by R when evaluating a variable, so columns in
#' the DataFrame can be accessed by simply giving their names.
#'
+#' @family dataframe_funcs
#' @rdname attach
#' @title Attach DataFrame to R search path
#' @param what (DataFrame) The DataFrame to attach