aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/sql-programming-guide.md6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala9
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala15
4 files changed, 7 insertions, 25 deletions
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 99fec6c778..e8eb88488e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -2057,6 +2057,12 @@ options.
- The canonical name of SQL/DataFrame functions are now lower case (e.g. sum vs SUM).
- It has been determined that using the DirectOutputCommitter when speculation is enabled is unsafe
and thus this output committer will not be used when speculation is on, independent of configuration.
+ - JSON data source will not automatically load new files that are created by other applications
+ (i.e. files that are not inserted to the dataset through Spark SQL).
+ For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore),
+ users can use `REFRESH TABLE` SQL command or `HiveContext`'s `refreshTable` method
+ to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate
+ the DataFrame and the new DataFrame will include new files.
## Upgrading from Spark SQL 1.3 to 1.4
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala
index 114c8b2118..ab8ca5f748 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala
@@ -111,15 +111,6 @@ private[sql] class JSONRelation(
jsonSchema
}
- override private[sql] def buildScan(
- requiredColumns: Array[String],
- filters: Array[Filter],
- inputPaths: Array[String],
- broadcastedConf: Broadcast[SerializableConfiguration]): RDD[Row] = {
- refresh()
- super.buildScan(requiredColumns, filters, inputPaths, broadcastedConf)
- }
-
override def buildScan(
requiredColumns: Array[String],
filters: Array[Filter],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index b3b326fe61..dff726b33f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -562,7 +562,7 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
})
}
- private[sql] def buildScan(
+ final private[sql] def buildScan(
requiredColumns: Array[String],
filters: Array[Filter],
inputPaths: Array[String],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 78bd3e5582..084d83f6e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -167,21 +167,6 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
)
}
- test("save directly to the path of a JSON table") {
- caseInsensitiveContext.table("jt").selectExpr("a * 5 as a", "b")
- .write.mode(SaveMode.Overwrite).json(path.toString)
- checkAnswer(
- sql("SELECT a, b FROM jsonTable"),
- (1 to 10).map(i => Row(i * 5, s"str$i"))
- )
-
- caseInsensitiveContext.table("jt").write.mode(SaveMode.Overwrite).json(path.toString)
- checkAnswer(
- sql("SELECT a, b FROM jsonTable"),
- (1 to 10).map(i => Row(i, s"str$i"))
- )
- }
-
test("it is not allowed to write to a table while querying it.") {
val message = intercept[AnalysisException] {
sql(