diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-06-16 12:46:25 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2016-06-16 12:46:25 -0700 |
commit | a865f6e05297f6121bb2fde717860f9edeed263e (patch) | |
tree | 874653a7d9736e654e26b996de6006d973038bb7 /examples/src/main | |
parent | 9ea0d5e326e08b914aa46f1eec8795688a61bf74 (diff) | |
download | spark-a865f6e05297f6121bb2fde717860f9edeed263e.tar.gz spark-a865f6e05297f6121bb2fde717860f9edeed263e.tar.bz2 spark-a865f6e05297f6121bb2fde717860f9edeed263e.zip |
[SPARK-15996][R] Fix R examples by removing deprecated functions
## What changes were proposed in this pull request?
Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the following. We had better update them before releasing 2.0 RC. This PR updates them to use up-to-date APIs.
```bash
$ bin/spark-submit examples/src/main/r/dataframe.R
...
Warning message:
'createDataFrame(sqlContext...)' is deprecated.
Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead.
See help("Deprecated")
...
Warning message:
'read.json(sqlContext...)' is deprecated.
Use 'read.json(path)' instead.
See help("Deprecated")
...
Error: could not find function "registerTempTable"
Execution halted
```
## How was this patch tested?
Manual.
```
curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv
bin/spark-submit examples/src/main/r/dataframe.R
bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv
```
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #13714 from dongjoon-hyun/SPARK-15996.
Diffstat (limited to 'examples/src/main')
-rw-r--r-- | examples/src/main/r/data-manipulation.R | 8 | ||||
-rw-r--r-- | examples/src/main/r/dataframe.R | 11 |
2 files changed, 11 insertions, 8 deletions
diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R index 58a30135aa..badb98bc78 100644 --- a/examples/src/main/r/data-manipulation.R +++ b/examples/src/main/r/data-manipulation.R @@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date) SFO_df <- flights_df[flights_df$dest == "SFO", ] # Convert the local data frame into a SparkDataFrame -SFO_DF <- createDataFrame(sqlContext, SFO_df) +SFO_DF <- createDataFrame(SFO_df) # Directly create a SparkDataFrame from the source data -flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = "true") +flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true") # Print the schema of this SparkDataFrame printSchema(flightsDF) @@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled") # Using SQL to select columns of data # First, register the flights SparkDataFrame as a table -registerTempTable(flightsDF, "flightsTable") -destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable") +createOrReplaceTempView(flightsDF, "flightsTable") +destDF <- sql("SELECT dest, cancelled FROM flightsTable") # Use collect to create a local R data frame local_df <- collect(destDF) diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R index 436bac6aaf..0434705717 100644 --- a/examples/src/main/r/dataframe.R +++ b/examples/src/main/r/dataframe.R @@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc) localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) # Convert local data frame to a SparkDataFrame -df <- createDataFrame(sqlContext, localDF) +df <- createDataFrame(localDF) # Print its schema printSchema(df) @@ -35,14 +35,17 @@ printSchema(df) # Create a DataFrame from a JSON file path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json") -peopleDF <- read.json(sqlContext, path) +peopleDF <- read.json(path) printSchema(peopleDF) +# root +# |-- age: long (nullable = true) +# |-- name: string (nullable = true) # Register this DataFrame as a table. -registerTempTable(peopleDF, "people") +createOrReplaceTempView(peopleDF, "people") # SQL statements can be run by using the sql methods provided by sqlContext -teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") +teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") # Call collect to get a local data.frame teenagersLocalDF <- collect(teenagers) |