aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/r
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2016-06-20 13:46:24 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-06-20 13:46:24 -0700
commit359c2e827d5682249c009e83379a5ee8e5aa4e89 (patch)
tree1aac6f407a2fac66b44bd4e03672f58d311ee759 /examples/src/main/r
parentb0f2fb5b9729b38744bf784f2072f5ee52314f87 (diff)
downloadspark-359c2e827d5682249c009e83379a5ee8e5aa4e89.tar.gz
spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.tar.bz2
spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.zip
[SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates
## What changes were proposed in this pull request? roxygen2 doc, programming guide, example updates ## How was this patch tested? manual checks shivaram Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #13751 from felixcheung/rsparksessiondoc.
Diffstat (limited to 'examples/src/main/r')
-rw-r--r--examples/src/main/r/data-manipulation.R15
-rw-r--r--examples/src/main/r/dataframe.R13
-rw-r--r--examples/src/main/r/ml.R21
3 files changed, 22 insertions, 27 deletions
diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R
index badb98bc78..371335a62e 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -17,7 +17,7 @@
# For this example, we shall use the "flights" dataset
# The dataset consists of every flight departing Houston in 2011.
-# The data set is made up of 227,496 rows x 14 columns.
+# The data set is made up of 227,496 rows x 14 columns.
# To run this example use
# ./bin/spark-submit examples/src/main/r/data-manipulation.R <path_to_csv>
@@ -33,11 +33,8 @@ if (length(args) != 1) {
q("no")
}
-## Initialize SparkContext
-sc <- sparkR.init(appName = "SparkR-data-manipulation-example")
-
-## Initialize SQLContext
-sqlContext <- sparkRSQL.init(sc)
+## Initialize SparkSession
+sparkR.session(appName = "SparkR-data-manipulation-example")
flightsCsvPath <- args[[1]]
@@ -46,7 +43,7 @@ flights_df <- read.csv(flightsCsvPath, header = TRUE)
flights_df$date <- as.Date(flights_df$date)
## Filter flights whose destination is San Francisco and write to a local data frame
-SFO_df <- flights_df[flights_df$dest == "SFO", ]
+SFO_df <- flights_df[flights_df$dest == "SFO", ]
# Convert the local data frame into a SparkDataFrame
SFO_DF <- createDataFrame(SFO_df)
@@ -102,5 +99,5 @@ if("magrittr" %in% rownames(installed.packages())) {
head(dailyDelayDF)
}
-# Stop the SparkContext now
-sparkR.stop()
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 0434705717..a377d6e864 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -17,9 +17,8 @@
library(SparkR)
-# Initialize SparkContext and SQLContext
-sc <- sparkR.init(appName="SparkR-DataFrame-example")
-sqlContext <- sparkRSQL.init(sc)
+# Initialize SparkSession
+sc <- sparkR.session(appName="SparkR-DataFrame-example")
# Create a simple local data.frame
localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
@@ -44,14 +43,14 @@ printSchema(peopleDF)
# Register this DataFrame as a table.
createOrReplaceTempView(peopleDF, "people")
-# SQL statements can be run by using the sql methods provided by sqlContext
+# SQL statements can be run by using the sql methods
teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
# Call collect to get a local data.frame
teenagersLocalDF <- collect(teenagers)
-# Print the teenagers in our dataset
+# Print the teenagers in our dataset
print(teenagersLocalDF)
-# Stop the SparkContext now
-sparkR.stop()
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
index 495f392c26..940c98dcb9 100644
--- a/examples/src/main/r/ml.R
+++ b/examples/src/main/r/ml.R
@@ -21,14 +21,13 @@
# Load SparkR library into your R session
library(SparkR)
-# Initialize SparkContext and SQLContext
-sc <- sparkR.init(appName="SparkR-ML-example")
-sqlContext <- sparkRSQL.init(sc)
+# Initialize SparkSession
+sparkR.session(appName="SparkR-ML-example")
# $example on$
############################ spark.glm and glm ##############################################
-irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
+irisDF <- suppressWarnings(createDataFrame(iris))
# Fit a generalized linear model of family "gaussian" with spark.glm
gaussianDF <- irisDF
gaussianTestDF <- irisDF
@@ -62,7 +61,7 @@ showDF(binomialPredictions)
library(survival)
# Fit an accelerated failure time (AFT) survival regression model with spark.survreg
-ovarianDF <- suppressWarnings(createDataFrame(sqlContext, ovarian))
+ovarianDF <- suppressWarnings(createDataFrame(ovarian))
aftDF <- ovarianDF
aftTestDF <- ovarianDF
aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx)
@@ -78,7 +77,7 @@ showDF(aftPredictions)
# Fit a Bernoulli naive Bayes model with spark.naiveBayes
titanic <- as.data.frame(Titanic)
-titanicDF <- suppressWarnings(createDataFrame(sqlContext, titanic[titanic$Freq > 0, -5]))
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
nbDF <- titanicDF
nbTestDF <- titanicDF
nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age)
@@ -93,7 +92,7 @@ showDF(nbPredictions)
############################ spark.kmeans ##############################################
# Fit a k-means model with spark.kmeans
-irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
+irisDF <- suppressWarnings(createDataFrame(iris))
kmeansDF <- irisDF
kmeansTestDF <- irisDF
kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
@@ -111,7 +110,7 @@ showDF(kmeansPredictions)
############################ model read/write ##############################################
-irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
+irisDF <- suppressWarnings(createDataFrame(iris))
# Fit a generalized linear model of family "gaussian" with spark.glm
gaussianDF <- irisDF
gaussianTestDF <- irisDF
@@ -139,11 +138,11 @@ train <- function(family) {
model <- glm(Sepal.Length ~ Sepal.Width + Species, iris, family = family)
summary(model)
}
-model.summaries <- spark.lapply(sc, families, train)
+model.summaries <- spark.lapply(families, train)
# Print the summary of each model
print(model.summaries)
-# Stop the SparkContext now
-sparkR.stop()
+# Stop the SparkSession now
+sparkR.session.stop()