aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorNarine Kokhlikyan <narine.kokhlikyan@gmail.com>2015-10-13 10:09:05 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-10-13 10:09:05 -0700
commit1e0aba90b9e73834af70d196f7f869b062d98d94 (patch)
tree0913e8561478ec706e06eaa9af5a357c61adb3e1 /R
parent5e3868ba139f5f0b3a33361c6b884594a3ab6421 (diff)
downloadspark-1e0aba90b9e73834af70d196f7f869b062d98d94.tar.gz
spark-1e0aba90b9e73834af70d196f7f869b062d98d94.tar.bz2
spark-1e0aba90b9e73834af70d196f7f869b062d98d94.zip
[SPARK-10888] [SPARKR] Added as.DataFrame as a synonym to createDataFrame
as.DataFrame is more a R-style like signature. Also, I'd like to know if we could make the context, e.g. sqlContext global, so that we do not have to specify it as an argument, when we each time create a dataframe. Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com> Closes #8952 from NarineK/sparkrasDataFrame.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE3
-rw-r--r--R/pkg/R/SQLContext.R17
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R15
3 files changed, 30 insertions, 5 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 95d949ee3e..41986a5e7a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -228,7 +228,8 @@ exportMethods("agg")
export("sparkRSQL.init",
"sparkRHive.init")
-export("cacheTable",
+export("as.DataFrame",
+ "cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 66c7e30721..399f53657a 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -64,21 +64,23 @@ infer_type <- function(x) {
}
}
-#' Create a DataFrame from an RDD
+#' Create a DataFrame
#'
-#' Converts an RDD to a DataFrame by infer the types.
+#' Converts R data.frame or list into DataFrame.
#'
#' @param sqlContext A SQLContext
#' @param data An RDD or list or data.frame
#' @param schema a list of column names or named list (StructType), optional
#' @return an DataFrame
+#' @rdname createDataFrame
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
-#' df <- createDataFrame(sqlContext, rdd)
+#' df1 <- as.DataFrame(sqlContext, iris)
+#' df2 <- as.DataFrame(sqlContext, list(3,4,5,6))
+#' df3 <- createDataFrame(sqlContext, iris)
#' }
# TODO(davies): support sampling and infer type from NA
@@ -151,6 +153,13 @@ createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0
dataFrame(sdf)
}
+#' @rdname createDataFrame
+#' @aliases createDataFrame
+#' @export
+as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
+ createDataFrame(sqlContext, data, schema, samplingRatio)
+}
+
# toDF
#
# Converts an RDD to a DataFrame by infer the types.
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index af6efa40fb..b599994854 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -89,17 +89,28 @@ test_that("structType and structField", {
test_that("create DataFrame from RDD", {
rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
df <- createDataFrame(sqlContext, rdd, list("a", "b"))
+ dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b"))
expect_is(df, "DataFrame")
+ expect_is(dfAsDF, "DataFrame")
expect_equal(count(df), 10)
+ expect_equal(count(dfAsDF), 10)
expect_equal(nrow(df), 10)
+ expect_equal(nrow(dfAsDF), 10)
expect_equal(ncol(df), 2)
+ expect_equal(ncol(dfAsDF), 2)
expect_equal(dim(df), c(10, 2))
+ expect_equal(dim(dfAsDF), c(10, 2))
expect_equal(columns(df), c("a", "b"))
+ expect_equal(columns(dfAsDF), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+ expect_equal(dtypes(dfAsDF), list(c("a", "int"), c("b", "string")))
df <- createDataFrame(sqlContext, rdd)
+ dfAsDF <- as.DataFrame(sqlContext, rdd)
expect_is(df, "DataFrame")
+ expect_is(dfAsDF, "DataFrame")
expect_equal(columns(df), c("_1", "_2"))
+ expect_equal(columns(dfAsDF), c("_1", "_2"))
schema <- structType(structField(x = "a", type = "integer", nullable = TRUE),
structField(x = "b", type = "string", nullable = TRUE))
@@ -130,9 +141,13 @@ test_that("create DataFrame from RDD", {
schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df2 <- createDataFrame(sqlContext, df.toRDD, schema)
+ df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
expect_equal(columns(df2), c("name", "age", "height"))
+ expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
+ expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
+ expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19, 23, 18),