aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--R/pkg/R/sparkR.R3
-rw-r--r--R/pkg/inst/tests/testthat/test_client.R6
-rw-r--r--docs/sparkr.md4
-rw-r--r--examples/src/main/r/data-manipulation.R7
4 files changed, 8 insertions, 12 deletions
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index c187869fdf..04a8b1e1f3 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -103,8 +103,7 @@ sparkR.stop <- function() {
#' list(spark.executor.memory="4g"),
#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
#' c("one.jar", "two.jar", "three.jar"),
-#' c("com.databricks:spark-avro_2.10:2.0.1",
-#' "com.databricks:spark-csv_2.10:1.3.0"))
+#' c("com.databricks:spark-avro_2.10:2.0.1"))
#'}
sparkR.init <- function(
diff --git a/R/pkg/inst/tests/testthat/test_client.R b/R/pkg/inst/tests/testthat/test_client.R
index 28276a020d..0cf25fe1db 100644
--- a/R/pkg/inst/tests/testthat/test_client.R
+++ b/R/pkg/inst/tests/testthat/test_client.R
@@ -37,9 +37,7 @@ test_that("multiple packages don't produce a warning", {
test_that("sparkJars sparkPackages as character vectors", {
args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
- c("com.databricks:spark-avro_2.10:2.0.1",
- "com.databricks:spark-csv_2.10:1.3.0"))
+ c("com.databricks:spark-avro_2.10:2.0.1"))
expect_match(args, "--jars one.jar,two.jar,three.jar")
- expect_match(args,
- "--packages com.databricks:spark-avro_2.10:2.0.1,com.databricks:spark-csv_2.10:1.3.0")
+ expect_match(args, "--packages com.databricks:spark-avro_2.10:2.0.1")
})
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 760534ae14..9b5eaa1ec7 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -115,13 +115,13 @@ head(df)
SparkR supports operating on a variety of data sources through the `DataFrame` interface. This section describes the general methods for loading and saving data using Data Sources. You can check the Spark SQL programming guide for more [specific options](sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources.
-The general method for creating DataFrames from data sources is `read.df`. This method takes in the `SQLContext`, the path for the file to load and the type of data source. SparkR supports reading JSON and Parquet files natively and through [Spark Packages](http://spark-packages.org/) you can find data source connectors for popular file formats like [CSV](http://spark-packages.org/package/databricks/spark-csv) and [Avro](http://spark-packages.org/package/databricks/spark-avro). These packages can either be added by
+The general method for creating DataFrames from data sources is `read.df`. This method takes in the `SQLContext`, the path for the file to load and the type of data source. SparkR supports reading JSON, CSV and Parquet files natively and through [Spark Packages](http://spark-packages.org/) you can find data source connectors for popular file formats like [Avro](http://spark-packages.org/package/databricks/spark-avro). These packages can either be added by
specifying `--packages` with `spark-submit` or `sparkR` commands, or if creating context through `init`
you can specify the packages with the `packages` argument.
<div data-lang="r" markdown="1">
{% highlight r %}
-sc <- sparkR.init(sparkPackages="com.databricks:spark-csv_2.11:1.0.3")
+sc <- sparkR.init(sparkPackages="com.databricks:spark-avro_2.11:2.0.1")
sqlContext <- sparkRSQL.init(sc)
{% endhighlight %}
</div>
diff --git a/examples/src/main/r/data-manipulation.R b/examples/src/main/r/data-manipulation.R
index 594bf49d60..58a30135aa 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -20,8 +20,7 @@
# The data set is made up of 227,496 rows x 14 columns.
# To run this example use
-# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3
-# examples/src/main/r/data-manipulation.R <path_to_csv>
+# ./bin/spark-submit examples/src/main/r/data-manipulation.R <path_to_csv>
# Load SparkR library into your R session
library(SparkR)
@@ -29,7 +28,7 @@ library(SparkR)
args <- commandArgs(trailing = TRUE)
if (length(args) != 1) {
- print("Usage: data-manipulation.R <path-to-flights.csv")
+ print("Usage: data-manipulation.R <path-to-flights.csv>")
print("The data can be downloaded from: http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv")
q("no")
}
@@ -53,7 +52,7 @@ SFO_df <- flights_df[flights_df$dest == "SFO", ]
SFO_DF <- createDataFrame(sqlContext, SFO_df)
# Directly create a SparkDataFrame from the source data
-flightsDF <- read.df(sqlContext, flightsCsvPath, source = "com.databricks.spark.csv", header = "true")
+flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = "true")
# Print the schema of this SparkDataFrame
printSchema(flightsDF)