[SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates

## What changes were proposed in this pull request? roxygen2 doc, programming guide, example updates ## How was this patch tested? manual checks shivaram Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #13751 from felixcheung/rsparksessiondoc.
author: Felix Cheung <felixcheung_m@hotmail.com> 2016-06-20 13:46:24 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2016-06-20 13:46:24 -0700
commit: 359c2e827d5682249c009e83379a5ee8e5aa4e89 (patch)
tree: 1aac6f407a2fac66b44bd4e03672f58d311ee759 /examples/src/main/r
parent: b0f2fb5b9729b38744bf784f2072f5ee52314f87 (diff)
download: spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.tar.gz
spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.tar.bz2
spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.zip
3 files changed, 22 insertions, 27 deletions
diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R
index badb98bc78..371335a62e 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -17,7 +17,7 @@
 
 # For this example, we shall use the "flights" dataset
 # The dataset consists of every flight departing Houston in 2011.
-# The data set is made up of 227,496 rows x 14 columns. 
+# The data set is made up of 227,496 rows x 14 columns.
 
 # To run this example use
 # ./bin/spark-submit examples/src/main/r/data-manipulation.R <path_to_csv>
@@ -33,11 +33,8 @@ if (length(args) != 1) {
   q("no")
 }
 
-## Initialize SparkContext
-sc <- sparkR.init(appName = "SparkR-data-manipulation-example")
-
-## Initialize SQLContext
-sqlContext <- sparkRSQL.init(sc)
+## Initialize SparkSession
+sparkR.session(appName = "SparkR-data-manipulation-example")
 
 flightsCsvPath <- args[[1]]
 
@@ -46,7 +43,7 @@ flights_df <- read.csv(flightsCsvPath, header = TRUE)
 flights_df$date <- as.Date(flights_df$date)
 
 ## Filter flights whose destination is San Francisco and write to a local data frame
-SFO_df <- flights_df[flights_df$dest == "SFO", ] 
+SFO_df <- flights_df[flights_df$dest == "SFO", ]
 
 # Convert the local data frame into a SparkDataFrame
 SFO_DF <- createDataFrame(SFO_df)
@@ -102,5 +99,5 @@ if("magrittr" %in% rownames(installed.packages())) {
   head(dailyDelayDF)
 }
 
-# Stop the SparkContext now
-sparkR.stop()
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 0434705717..a377d6e864 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -17,9 +17,8 @@
 
 library(SparkR)
 
-# Initialize SparkContext and SQLContext
-sc <- sparkR.init(appName="SparkR-DataFrame-example")
-sqlContext <- sparkRSQL.init(sc)
+# Initialize SparkSession
+sc <- sparkR.session(appName="SparkR-DataFrame-example")
 
 # Create a simple local data.frame
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
@@ -44,14 +43,14 @@ printSchema(peopleDF)
 # Register this DataFrame as a table.
 createOrReplaceTempView(peopleDF, "people")
 
-# SQL statements can be run by using the sql methods provided by sqlContext
+# SQL statements can be run by using the sql methods
 teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 
 # Call collect to get a local data.frame
 teenagersLocalDF <- collect(teenagers)
 
-# Print the teenagers in our dataset 
+# Print the teenagers in our dataset
 print(teenagersLocalDF)
 
-# Stop the SparkContext now
-sparkR.stop()
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
index 495f392c26..940c98dcb9 100644
--- a/examples/src/main/r/ml.R
+++ b/examples/src/main/r/ml.R
@@ -21,14 +21,13 @@
 # Load SparkR library into your R session
 library(SparkR)
 
-# Initialize SparkContext and SQLContext
-sc <- sparkR.init(appName="SparkR-ML-example")
-sqlContext <- sparkRSQL.init(sc)
+# Initialize SparkSession
+sparkR.session(appName="SparkR-ML-example")
 
 # $example on$
 ############################ spark.glm and glm ##############################################
 
-irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
+irisDF <- suppressWarnings(createDataFrame(iris))
 # Fit a generalized linear model of family "gaussian" with spark.glm
 gaussianDF <- irisDF
 gaussianTestDF <- irisDF
@@ -62,7 +61,7 @@ showDF(binomialPredictions)
 library(survival)
 
 # Fit an accelerated failure time (AFT) survival regression model with spark.survreg
-ovarianDF <- suppressWarnings(createDataFrame(sqlContext, ovarian))
+ovarianDF <- suppressWarnings(createDataFrame(ovarian))
 aftDF <- ovarianDF
 aftTestDF <- ovarianDF
 aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx)
@@ -78,7 +77,7 @@ showDF(aftPredictions)
 
 # Fit a Bernoulli naive Bayes model with spark.naiveBayes
 titanic <- as.data.frame(Titanic)
-titanicDF <- suppressWarnings(createDataFrame(sqlContext, titanic[titanic$Freq > 0, -5]))
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
 nbDF <- titanicDF
 nbTestDF <- titanicDF
 nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age)
@@ -93,7 +92,7 @@ showDF(nbPredictions)
 ############################ spark.kmeans ##############################################
 
 # Fit a k-means model with spark.kmeans
-irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
+irisDF <- suppressWarnings(createDataFrame(iris))
 kmeansDF <- irisDF
 kmeansTestDF <- irisDF
 kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
@@ -111,7 +110,7 @@ showDF(kmeansPredictions)
 
 ############################ model read/write ##############################################
 
-irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
+irisDF <- suppressWarnings(createDataFrame(iris))
 # Fit a generalized linear model of family "gaussian" with spark.glm
 gaussianDF <- irisDF
 gaussianTestDF <- irisDF
@@ -139,11 +138,11 @@ train <- function(family) {
   model <- glm(Sepal.Length ~ Sepal.Width + Species, iris, family = family)
   summary(model)
 }
-model.summaries <- spark.lapply(sc, families, train)
+model.summaries <- spark.lapply(families, train)
 
 # Print the summary of each model
 print(model.summaries)
 
 
-# Stop the SparkContext now
-sparkR.stop()
+# Stop the SparkSession now
+sparkR.session.stop()
author	Felix Cheung <felixcheung_m@hotmail.com>	2016-06-20 13:46:24 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2016-06-20 13:46:24 -0700
commit	359c2e827d5682249c009e83379a5ee8e5aa4e89 (patch)
tree	1aac6f407a2fac66b44bd4e03672f58d311ee759 /examples/src/main/r
parent	b0f2fb5b9729b38744bf784f2072f5ee52314f87 (diff)
download	spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.tar.gz spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.tar.bz2 spark-359c2e827d5682249c009e83379a5ee8e5aa4e89.zip