aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-06-28 15:32:45 -0700
committerReynold Xin <rxin@databricks.com>2016-06-28 15:32:45 -0700
commit25520e976275e0d1e3bf9c73128ef4dec4618568 (patch)
tree319efec3e93ab2cc27edfe426d051ca6750879c1
parent35438fb0ad3bcda5c5a3a0ccde1a620699d012db (diff)
downloadspark-25520e976275e0d1e3bf9c73128ef4dec4618568.tar.gz
spark-25520e976275e0d1e3bf9c73128ef4dec4618568.tar.bz2
spark-25520e976275e0d1e3bf9c73128ef4dec4618568.zip
[SPARK-16236][SQL] Add Path Option back to Load API in DataFrameReader
#### What changes were proposed in this pull request? koertkuipers identified the PR https://github.com/apache/spark/pull/13727/ changed the behavior of `load` API. After the change, the `load` API does not add the value of `path` into the `options`. Thank you! This PR is to add the option `path` back to `load()` API in `DataFrameReader`, if and only if users specify one and only one `path` in the `load` API. For example, users can see the `path` option after the following API call, ```Scala spark.read .format("parquet") .load("/test") ``` #### How was this patch tested? Added test cases. Author: gatorsmile <gatorsmile@gmail.com> Closes #13933 from gatorsmile/optionPath.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala28
2 files changed, 29 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 35ba9c5079..35ba522786 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -129,7 +129,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* @since 1.4.0
*/
def load(path: String): DataFrame = {
- load(Seq(path): _*) // force invocation of `load(...varargs...)`
+ option("path", path).load(Seq.empty: _*) // force invocation of `load(...varargs...)`
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 3fa3864bc9..ebbcc1d7ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -170,6 +170,34 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
assert(LastOptions.saveMode === SaveMode.ErrorIfExists)
}
+ test("test path option in load") {
+ spark.read
+ .format("org.apache.spark.sql.test")
+ .option("intOpt", 56)
+ .load("/test")
+
+ assert(LastOptions.parameters("intOpt") == "56")
+ assert(LastOptions.parameters("path") == "/test")
+
+ LastOptions.clear()
+ spark.read
+ .format("org.apache.spark.sql.test")
+ .option("intOpt", 55)
+ .load()
+
+ assert(LastOptions.parameters("intOpt") == "55")
+ assert(!LastOptions.parameters.contains("path"))
+
+ LastOptions.clear()
+ spark.read
+ .format("org.apache.spark.sql.test")
+ .option("intOpt", 54)
+ .load("/test", "/test1", "/test2")
+
+ assert(LastOptions.parameters("intOpt") == "54")
+ assert(!LastOptions.parameters.contains("path"))
+ }
+
test("test different data types for options") {
val df = spark.read
.format("org.apache.spark.sql.test")