aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE3
-rw-r--r--R/pkg/R/SQLContext.R17
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R15
3 files changed, 30 insertions, 5 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 95d949ee3e..41986a5e7a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -228,7 +228,8 @@ exportMethods("agg")
export("sparkRSQL.init",
"sparkRHive.init")
-export("cacheTable",
+export("as.DataFrame",
+ "cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 66c7e30721..399f53657a 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -64,21 +64,23 @@ infer_type <- function(x) {
}
}
-#' Create a DataFrame from an RDD
+#' Create a DataFrame
#'
-#' Converts an RDD to a DataFrame by infer the types.
+#' Converts R data.frame or list into DataFrame.
#'
#' @param sqlContext A SQLContext
#' @param data An RDD or list or data.frame
#' @param schema a list of column names or named list (StructType), optional
#' @return an DataFrame
+#' @rdname createDataFrame
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
-#' df <- createDataFrame(sqlContext, rdd)
+#' df1 <- as.DataFrame(sqlContext, iris)
+#' df2 <- as.DataFrame(sqlContext, list(3,4,5,6))
+#' df3 <- createDataFrame(sqlContext, iris)
#' }
# TODO(davies): support sampling and infer type from NA
@@ -151,6 +153,13 @@ createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0
dataFrame(sdf)
}
+#' @rdname createDataFrame
+#' @aliases createDataFrame
+#' @export
+as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
+ createDataFrame(sqlContext, data, schema, samplingRatio)
+}
+
# toDF
#
# Converts an RDD to a DataFrame by infer the types.
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index af6efa40fb..b599994854 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -89,17 +89,28 @@ test_that("structType and structField", {
test_that("create DataFrame from RDD", {
rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
df <- createDataFrame(sqlContext, rdd, list("a", "b"))
+ dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b"))
expect_is(df, "DataFrame")
+ expect_is(dfAsDF, "DataFrame")
expect_equal(count(df), 10)
+ expect_equal(count(dfAsDF), 10)
expect_equal(nrow(df), 10)
+ expect_equal(nrow(dfAsDF), 10)
expect_equal(ncol(df), 2)
+ expect_equal(ncol(dfAsDF), 2)
expect_equal(dim(df), c(10, 2))
+ expect_equal(dim(dfAsDF), c(10, 2))
expect_equal(columns(df), c("a", "b"))
+ expect_equal(columns(dfAsDF), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+ expect_equal(dtypes(dfAsDF), list(c("a", "int"), c("b", "string")))
df <- createDataFrame(sqlContext, rdd)
+ dfAsDF <- as.DataFrame(sqlContext, rdd)
expect_is(df, "DataFrame")
+ expect_is(dfAsDF, "DataFrame")
expect_equal(columns(df), c("_1", "_2"))
+ expect_equal(columns(dfAsDF), c("_1", "_2"))
schema <- structType(structField(x = "a", type = "integer", nullable = TRUE),
structField(x = "b", type = "string", nullable = TRUE))
@@ -130,9 +141,13 @@ test_that("create DataFrame from RDD", {
schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df2 <- createDataFrame(sqlContext, df.toRDD, schema)
+ df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
expect_equal(columns(df2), c("name", "age", "height"))
+ expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
+ expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
+ expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19, 23, 18),