diff options
author | felixcheung <felixcheung_m@hotmail.com> | 2015-10-23 21:42:00 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-10-23 21:42:00 -0700 |
commit | 5e458125018029cef5cde3390f4a55dd4e164fde (patch) | |
tree | c1b69602f448eb505b5d7303fae130abd194cbcb /R | |
parent | 2462dbcce89d657bca17ae311c99c2a4bee4a5fa (diff) | |
download | spark-5e458125018029cef5cde3390f4a55dd4e164fde.tar.gz spark-5e458125018029cef5cde3390f4a55dd4e164fde.tar.bz2 spark-5e458125018029cef5cde3390f4a55dd4e164fde.zip |
[SPARK-11294][SPARKR] Improve R doc for read.df, write.df, saveAsTable
Add examples for read.df, write.df; fix grouping for read.df, loadDF; fix formatting and text truncation for write.df, saveAsTable.
Several text issues:
![image](https://cloud.githubusercontent.com/assets/8969467/10708590/1303a44e-79c3-11e5-854f-3a2e16854cd7.png)
- text collapsed into a single paragraph
- text truncated at 2 places, eg. "overwrite: Existing data is expected to be overwritten by the contents of error:"
shivaram
Author: felixcheung <felixcheung_m@hotmail.com>
Closes #9261 from felixcheung/rdocreadwritedf.
Diffstat (limited to 'R')
-rw-r--r-- | R/pkg/R/DataFrame.R | 27 | ||||
-rw-r--r-- | R/pkg/R/SQLContext.R | 16 |
2 files changed, 24 insertions, 19 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 993be82a47..2acbd081cd 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1572,18 +1572,17 @@ setMethod("except", #' spark.sql.sources.default will be used. #' #' Additionally, mode is used to specify the behavior of the save operation when -#' data already exists in the data source. There are four modes: -#' append: Contents of this DataFrame are expected to be appended to existing data. -#' overwrite: Existing data is expected to be overwritten by the contents of -# this DataFrame. -#' error: An exception is expected to be thrown. +#' data already exists in the data source. There are four modes: \cr +#' append: Contents of this DataFrame are expected to be appended to existing data. \cr +#' overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr +#' error: An exception is expected to be thrown. \cr #' ignore: The save operation is expected to not save the contents of the DataFrame -# and to not change the existing data. +#' and to not change the existing data. \cr #' #' @param df A SparkSQL DataFrame #' @param path A name for the table #' @param source A name for external data source -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' +#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode #' #' @rdname write.df #' @name write.df @@ -1596,6 +1595,7 @@ setMethod("except", #' path <- "path/to/file.json" #' df <- jsonFile(sqlContext, path) #' write.df(df, "myfile", "parquet", "overwrite") +#' saveDF(df, parquetPath2, "parquet", mode = saveMode, mergeSchema = mergeSchema) #' } setMethod("write.df", signature(df = "DataFrame", path = "character"), @@ -1637,18 +1637,17 @@ setMethod("saveDF", #' spark.sql.sources.default will be used. #' #' Additionally, mode is used to specify the behavior of the save operation when -#' data already exists in the data source. There are four modes: -#' append: Contents of this DataFrame are expected to be appended to existing data. -#' overwrite: Existing data is expected to be overwritten by the contents of -# this DataFrame. -#' error: An exception is expected to be thrown. +#' data already exists in the data source. There are four modes: \cr +#' append: Contents of this DataFrame are expected to be appended to existing data. \cr +#' overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr +#' error: An exception is expected to be thrown. \cr #' ignore: The save operation is expected to not save the contents of the DataFrame -# and to not change the existing data. +#' and to not change the existing data. \cr #' #' @param df A SparkSQL DataFrame #' @param tableName A name for the table #' @param source A name for external data source -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' +#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode #' #' @rdname saveAsTable #' @name saveAsTable diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 399f53657a..1bf025cce4 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -452,14 +452,21 @@ dropTempTable <- function(sqlContext, tableName) { #' #' @param sqlContext SQLContext to use #' @param path The path of files to load -#' @param source the name of external data source +#' @param source The name of external data source +#' @param schema The data schema defined in structType #' @return DataFrame +#' @rdname read.df +#' @name read.df #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df <- read.df(sqlContext, "path/to/file.json", source = "json") +#' df1 <- read.df(sqlContext, "path/to/file.json", source = "json") +#' schema <- structType(structField("name", "string"), +#' structField("info", "map<string,double>")) +#' df2 <- read.df(sqlContext, mapTypeJsonPath, "json", schema) +#' df3 <- loadDF(sqlContext, "data/test_table", "parquet", mergeSchema = "true") #' } read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) { @@ -482,9 +489,8 @@ read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) dataFrame(sdf) } -#' @aliases loadDF -#' @export - +#' @rdname read.df +#' @name loadDF loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) { read.df(sqlContext, path, source, schema, ...) } |