From a865f6e05297f6121bb2fde717860f9edeed263e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 16 Jun 2016 12:46:25 -0700 Subject: [SPARK-15996][R] Fix R examples by removing deprecated functions ## What changes were proposed in this pull request? Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the following. We had better update them before releasing 2.0 RC. This PR updates them to use up-to-date APIs. ```bash $ bin/spark-submit examples/src/main/r/dataframe.R ... Warning message: 'createDataFrame(sqlContext...)' is deprecated. Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead. See help("Deprecated") ... Warning message: 'read.json(sqlContext...)' is deprecated. Use 'read.json(path)' instead. See help("Deprecated") ... Error: could not find function "registerTempTable" Execution halted ``` ## How was this patch tested? Manual. ``` curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv bin/spark-submit examples/src/main/r/dataframe.R bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv ``` Author: Dongjoon Hyun Closes #13714 from dongjoon-hyun/SPARK-15996. --- examples/src/main/r/data-manipulation.R | 8 ++++---- examples/src/main/r/dataframe.R | 11 +++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'examples') diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R index 58a30135aa..badb98bc78 100644 --- a/examples/src/main/r/data-manipulation.R +++ b/examples/src/main/r/data-manipulation.R @@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date) SFO_df <- flights_df[flights_df$dest == "SFO", ] # Convert the local data frame into a SparkDataFrame -SFO_DF <- createDataFrame(sqlContext, SFO_df) +SFO_DF <- createDataFrame(SFO_df) # Directly create a SparkDataFrame from the source data -flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = "true") +flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true") # Print the schema of this SparkDataFrame printSchema(flightsDF) @@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled") # Using SQL to select columns of data # First, register the flights SparkDataFrame as a table -registerTempTable(flightsDF, "flightsTable") -destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable") +createOrReplaceTempView(flightsDF, "flightsTable") +destDF <- sql("SELECT dest, cancelled FROM flightsTable") # Use collect to create a local R data frame local_df <- collect(destDF) diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 436bac6aaf..0434705717 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc) localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) # Convert local data frame to a SparkDataFrame -df <- createDataFrame(sqlContext, localDF) +df <- createDataFrame(localDF) # Print its schema printSchema(df) @@ -35,14 +35,17 @@ printSchema(df) # Create a DataFrame from a JSON file path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json") -peopleDF <- read.json(sqlContext, path) +peopleDF <- read.json(path) printSchema(peopleDF) +# root +# |-- age: long (nullable = true) +# |-- name: string (nullable = true) # Register this DataFrame as a table. -registerTempTable(peopleDF, "people") +createOrReplaceTempView(peopleDF, "people") # SQL statements can be run by using the sql methods provided by sqlContext -teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") +teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") # Call collect to get a local data.frame teenagersLocalDF <- collect(teenagers) -- cgit v1.2.3