From 359c2e827d5682249c009e83379a5ee8e5aa4e89 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Mon, 20 Jun 2016 13:46:24 -0700 Subject: [SPARK-15159][SPARKR] SparkSession roxygen2 doc, programming guide, example updates ## What changes were proposed in this pull request? roxygen2 doc, programming guide, example updates ## How was this patch tested? manual checks shivaram Author: Felix Cheung Closes #13751 from felixcheung/rsparksessiondoc. --- examples/src/main/r/data-manipulation.R | 15 ++++++--------- examples/src/main/r/dataframe.R | 13 ++++++------- examples/src/main/r/ml.R | 21 ++++++++++----------- 3 files changed, 22 insertions(+), 27 deletions(-) (limited to 'examples') diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R index badb98bc78..371335a62e 100644 --- a/examples/src/main/r/data-manipulation.R +++ b/examples/src/main/r/data-manipulation.R @@ -17,7 +17,7 @@ # For this example, we shall use the "flights" dataset # The dataset consists of every flight departing Houston in 2011. -# The data set is made up of 227,496 rows x 14 columns. +# The data set is made up of 227,496 rows x 14 columns. # To run this example use # ./bin/spark-submit examples/src/main/r/data-manipulation.R @@ -33,11 +33,8 @@ if (length(args) != 1) { q("no") } -## Initialize SparkContext -sc <- sparkR.init(appName = "SparkR-data-manipulation-example") - -## Initialize SQLContext -sqlContext <- sparkRSQL.init(sc) +## Initialize SparkSession +sparkR.session(appName = "SparkR-data-manipulation-example") flightsCsvPath <- args[[1]] @@ -46,7 +43,7 @@ flights_df <- read.csv(flightsCsvPath, header = TRUE) flights_df$date <- as.Date(flights_df$date) ## Filter flights whose destination is San Francisco and write to a local data frame -SFO_df <- flights_df[flights_df$dest == "SFO", ] +SFO_df <- flights_df[flights_df$dest == "SFO", ] # Convert the local data frame into a SparkDataFrame SFO_DF <- createDataFrame(SFO_df) @@ -102,5 +99,5 @@ if("magrittr" %in% rownames(installed.packages())) { head(dailyDelayDF) } -# Stop the SparkContext now -sparkR.stop() +# Stop the SparkSession now +sparkR.session.stop() diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 0434705717..a377d6e864 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -17,9 +17,8 @@ library(SparkR) -# Initialize SparkContext and SQLContext -sc <- sparkR.init(appName="SparkR-DataFrame-example") -sqlContext <- sparkRSQL.init(sc) +# Initialize SparkSession +sc <- sparkR.session(appName="SparkR-DataFrame-example") # Create a simple local data.frame localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) @@ -44,14 +43,14 @@ printSchema(peopleDF) # Register this DataFrame as a table. createOrReplaceTempView(peopleDF, "people") -# SQL statements can be run by using the sql methods provided by sqlContext +# SQL statements can be run by using the sql methods teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") # Call collect to get a local data.frame teenagersLocalDF <- collect(teenagers) -# Print the teenagers in our dataset +# Print the teenagers in our dataset print(teenagersLocalDF) -# Stop the SparkContext now -sparkR.stop() +# Stop the SparkSession now +sparkR.session.stop() diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R index 495f392c26..940c98dcb9 100644 --- a/examples/src/main/r/ml.R +++ b/examples/src/main/r/ml.R @@ -21,14 +21,13 @@ # Load SparkR library into your R session library(SparkR) -# Initialize SparkContext and SQLContext -sc <- sparkR.init(appName="SparkR-ML-example") -sqlContext <- sparkRSQL.init(sc) +# Initialize SparkSession +sparkR.session(appName="SparkR-ML-example") # $example on$ ############################ spark.glm and glm ############################################## -irisDF <- suppressWarnings(createDataFrame(sqlContext, iris)) +irisDF <- suppressWarnings(createDataFrame(iris)) # Fit a generalized linear model of family "gaussian" with spark.glm gaussianDF <- irisDF gaussianTestDF <- irisDF @@ -62,7 +61,7 @@ showDF(binomialPredictions) library(survival) # Fit an accelerated failure time (AFT) survival regression model with spark.survreg -ovarianDF <- suppressWarnings(createDataFrame(sqlContext, ovarian)) +ovarianDF <- suppressWarnings(createDataFrame(ovarian)) aftDF <- ovarianDF aftTestDF <- ovarianDF aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx) @@ -78,7 +77,7 @@ showDF(aftPredictions) # Fit a Bernoulli naive Bayes model with spark.naiveBayes titanic <- as.data.frame(Titanic) -titanicDF <- suppressWarnings(createDataFrame(sqlContext, titanic[titanic$Freq > 0, -5])) +titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5]) nbDF <- titanicDF nbTestDF <- titanicDF nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age) @@ -93,7 +92,7 @@ showDF(nbPredictions) ############################ spark.kmeans ############################################## # Fit a k-means model with spark.kmeans -irisDF <- suppressWarnings(createDataFrame(sqlContext, iris)) +irisDF <- suppressWarnings(createDataFrame(iris)) kmeansDF <- irisDF kmeansTestDF <- irisDF kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width, @@ -111,7 +110,7 @@ showDF(kmeansPredictions) ############################ model read/write ############################################## -irisDF <- suppressWarnings(createDataFrame(sqlContext, iris)) +irisDF <- suppressWarnings(createDataFrame(iris)) # Fit a generalized linear model of family "gaussian" with spark.glm gaussianDF <- irisDF gaussianTestDF <- irisDF @@ -139,11 +138,11 @@ train <- function(family) { model <- glm(Sepal.Length ~ Sepal.Width + Species, iris, family = family) summary(model) } -model.summaries <- spark.lapply(sc, families, train) +model.summaries <- spark.lapply(families, train) # Print the summary of each model print(model.summaries) -# Stop the SparkContext now -sparkR.stop() +# Stop the SparkSession now +sparkR.session.stop() -- cgit v1.2.3