aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorfelixcheung <felixcheung_m@hotmail.com>2015-10-23 21:42:00 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-10-23 21:42:00 -0700
commit5e458125018029cef5cde3390f4a55dd4e164fde (patch)
treec1b69602f448eb505b5d7303fae130abd194cbcb /R
parent2462dbcce89d657bca17ae311c99c2a4bee4a5fa (diff)
downloadspark-5e458125018029cef5cde3390f4a55dd4e164fde.tar.gz
spark-5e458125018029cef5cde3390f4a55dd4e164fde.tar.bz2
spark-5e458125018029cef5cde3390f4a55dd4e164fde.zip
[SPARK-11294][SPARKR] Improve R doc for read.df, write.df, saveAsTable
Add examples for read.df, write.df; fix grouping for read.df, loadDF; fix formatting and text truncation for write.df, saveAsTable. Several text issues: ![image](https://cloud.githubusercontent.com/assets/8969467/10708590/1303a44e-79c3-11e5-854f-3a2e16854cd7.png) - text collapsed into a single paragraph - text truncated at 2 places, eg. "overwrite: Existing data is expected to be overwritten by the contents of error:" shivaram Author: felixcheung <felixcheung_m@hotmail.com> Closes #9261 from felixcheung/rdocreadwritedf.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/DataFrame.R27
-rw-r--r--R/pkg/R/SQLContext.R16
2 files changed, 24 insertions, 19 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 993be82a47..2acbd081cd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1572,18 +1572,17 @@ setMethod("except",
#' spark.sql.sources.default will be used.
#'
#' Additionally, mode is used to specify the behavior of the save operation when
-#' data already exists in the data source. There are four modes:
-#' append: Contents of this DataFrame are expected to be appended to existing data.
-#' overwrite: Existing data is expected to be overwritten by the contents of
-# this DataFrame.
-#' error: An exception is expected to be thrown.
+#' data already exists in the data source. There are four modes: \cr
+#' append: Contents of this DataFrame are expected to be appended to existing data. \cr
+#' overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
+#' error: An exception is expected to be thrown. \cr
#' ignore: The save operation is expected to not save the contents of the DataFrame
-# and to not change the existing data.
+#' and to not change the existing data. \cr
#'
#' @param df A SparkSQL DataFrame
#' @param path A name for the table
#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
+#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode
#'
#' @rdname write.df
#' @name write.df
@@ -1596,6 +1595,7 @@ setMethod("except",
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
#' write.df(df, "myfile", "parquet", "overwrite")
+#' saveDF(df, parquetPath2, "parquet", mode = saveMode, mergeSchema = mergeSchema)
#' }
setMethod("write.df",
signature(df = "DataFrame", path = "character"),
@@ -1637,18 +1637,17 @@ setMethod("saveDF",
#' spark.sql.sources.default will be used.
#'
#' Additionally, mode is used to specify the behavior of the save operation when
-#' data already exists in the data source. There are four modes:
-#' append: Contents of this DataFrame are expected to be appended to existing data.
-#' overwrite: Existing data is expected to be overwritten by the contents of
-# this DataFrame.
-#' error: An exception is expected to be thrown.
+#' data already exists in the data source. There are four modes: \cr
+#' append: Contents of this DataFrame are expected to be appended to existing data. \cr
+#' overwrite: Existing data is expected to be overwritten by the contents of this DataFrame. \cr
+#' error: An exception is expected to be thrown. \cr
#' ignore: The save operation is expected to not save the contents of the DataFrame
-# and to not change the existing data.
+#' and to not change the existing data. \cr
#'
#' @param df A SparkSQL DataFrame
#' @param tableName A name for the table
#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
+#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode
#'
#' @rdname saveAsTable
#' @name saveAsTable
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 399f53657a..1bf025cce4 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -452,14 +452,21 @@ dropTempTable <- function(sqlContext, tableName) {
#'
#' @param sqlContext SQLContext to use
#' @param path The path of files to load
-#' @param source the name of external data source
+#' @param source The name of external data source
+#' @param schema The data schema defined in structType
#' @return DataFrame
+#' @rdname read.df
+#' @name read.df
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
-#' df <- read.df(sqlContext, "path/to/file.json", source = "json")
+#' df1 <- read.df(sqlContext, "path/to/file.json", source = "json")
+#' schema <- structType(structField("name", "string"),
+#' structField("info", "map<string,double>"))
+#' df2 <- read.df(sqlContext, mapTypeJsonPath, "json", schema)
+#' df3 <- loadDF(sqlContext, "data/test_table", "parquet", mergeSchema = "true")
#' }
read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
@@ -482,9 +489,8 @@ read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...)
dataFrame(sdf)
}
-#' @aliases loadDF
-#' @export
-
+#' @rdname read.df
+#' @name loadDF
loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
read.df(sqlContext, path, source, schema, ...)
}