aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-05-29 14:11:58 -0700
committerDavies Liu <davies@databricks.com>2015-05-29 14:11:58 -0700
commit5f48e5c33bafa376be5741e260a037c66103fdcd (patch)
tree7517dc75467eb80a439dbf87573aeff572289d12 /R
parent9eb222c13991c2b4a22db485710dc2e27ccf06dd (diff)
downloadspark-5f48e5c33bafa376be5741e260a037c66103fdcd.tar.gz
spark-5f48e5c33bafa376be5741e260a037c66103fdcd.tar.bz2
spark-5f48e5c33bafa376be5741e260a037c66103fdcd.zip
[SPARK-6806] [SPARKR] [DOCS] Add a new SparkR programming guide
This PR adds a new SparkR programming guide at the top-level. This will be useful for R users as our APIs don't directly match the Scala/Python APIs and as we need to explain SparkR without using RDDs as examples etc. cc rxin davies pwendell cc cafreeman -- Would be great if you could also take a look at this ! Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Closes #6490 from shivaram/sparkr-guide and squashes the following commits: d5ff360 [Shivaram Venkataraman] Add a section on HiveContext, HQL queries 408dce5 [Shivaram Venkataraman] Fix link dbb86e3 [Shivaram Venkataraman] Fix minor typo 9aff5e0 [Shivaram Venkataraman] Address comments, use dplyr-like syntax in example d09703c [Shivaram Venkataraman] Fix default argument in read.df ea816a1 [Shivaram Venkataraman] Add a new SparkR programming guide Also update write.df, read.df to handle defaults better
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/DataFrame.R10
-rw-r--r--R/pkg/R/SQLContext.R5
-rw-r--r--R/pkg/R/generics.R4
3 files changed, 11 insertions, 8 deletions
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index ed8093c80d..e79d324838 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1314,9 +1314,8 @@ setMethod("except",
#' write.df(df, "myfile", "parquet", "overwrite")
#' }
setMethod("write.df",
- signature(df = "DataFrame", path = 'character', source = 'character',
- mode = 'character'),
- function(df, path = NULL, source = NULL, mode = "append", ...){
+ signature(df = "DataFrame", path = 'character'),
+ function(df, path, source = NULL, mode = "append", ...){
if (is.null(source)) {
sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
@@ -1338,9 +1337,8 @@ setMethod("write.df",
#' @aliases saveDF
#' @export
setMethod("saveDF",
- signature(df = "DataFrame", path = 'character', source = 'character',
- mode = 'character'),
- function(df, path = NULL, source = NULL, mode = "append", ...){
+ signature(df = "DataFrame", path = 'character'),
+ function(df, path, source = NULL, mode = "append", ...){
write.df(df, path, source, mode, ...)
})
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 36cc612875..88e1a508f3 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -457,6 +457,11 @@ read.df <- function(sqlContext, path = NULL, source = NULL, ...) {
if (!is.null(path)) {
options[['path']] <- path
}
+ if (is.null(source)) {
+ sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
+ source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
+ "org.apache.spark.sql.parquet")
+ }
sdf <- callJMethod(sqlContext, "load", source, options)
dataFrame(sdf)
}
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a23d3b217b..1f4fc6adac 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -482,11 +482,11 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) {
#' @rdname write.df
#' @export
-setGeneric("write.df", function(df, path, source, mode, ...) { standardGeneric("write.df") })
+setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
#' @rdname write.df
#' @export
-setGeneric("saveDF", function(df, path, source, mode, ...) { standardGeneric("saveDF") })
+setGeneric("saveDF", function(df, path, ...) { standardGeneric("saveDF") })
#' @rdname schema
#' @export