From ee3b1715620d48b8d22d086ddeef49ad7ff249d2 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Mon, 9 May 2016 09:58:36 -0700 Subject: [MINOR] [SPARKR] Update data-manipulation.R to use native csv reader ## What changes were proposed in this pull request? * Since Spark has supported native csv reader, it does not necessary to use the third party ```spark-csv``` in ```examples/src/main/r/data-manipulation.R```. Meanwhile, remove all ```spark-csv``` usage in SparkR. * Running R applications through ```sparkR``` is not supported as of Spark 2.0, so we change to use ```./bin/spark-submit``` to run the example. ## How was this patch tested? Offline test. Author: Yanbo Liang Closes #13005 from yanboliang/r-df-examples. --- examples/src/main/r/data-manipulation.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'examples/src') diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R index 594bf49d60..58a30135aa 100644 --- a/examples/src/main/r/data-manipulation.R +++ b/examples/src/main/r/data-manipulation.R @@ -20,8 +20,7 @@ # The data set is made up of 227,496 rows x 14 columns. # To run this example use -# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3 -# examples/src/main/r/data-manipulation.R +# ./bin/spark-submit examples/src/main/r/data-manipulation.R # Load SparkR library into your R session library(SparkR) @@ -29,7 +28,7 @@ library(SparkR) args <- commandArgs(trailing = TRUE) if (length(args) != 1) { - print("Usage: data-manipulation.R ") print("The data can be downloaded from: http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv") q("no") } @@ -53,7 +52,7 @@ SFO_df <- flights_df[flights_df$dest == "SFO", ] SFO_DF <- createDataFrame(sqlContext, SFO_df) # Directly create a SparkDataFrame from the source data -flightsDF <- read.df(sqlContext, flightsCsvPath, source = "com.databricks.spark.csv", header = "true") +flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = "true") # Print the schema of this SparkDataFrame printSchema(flightsDF) -- cgit v1.2.3