[SPARK-10287] [SQL] Fixes JSONRelation refreshing on read path

https://issues.apache.org/jira/browse/SPARK-10287 After porting json to HadoopFsRelation, it seems hard to keep the behavior of picking up new files automatically for JSON. This PR removes this behavior, so JSON is consistent with others (ORC and Parquet). Author: Yin Huai <yhuai@databricks.com> Closes #8469 from yhuai/jsonRefresh.
author: Yin Huai <yhuai@databricks.com> 2015-08-27 16:11:25 -0700
committer: Yin Huai <yhuai@databricks.com> 2015-08-27 16:11:25 -0700
commit: b3dd569ad40905f8861a547a1e25ed3ca8e1d272 (patch)
tree: 484ae842e1511bf7bd80ca238bac26c539a84df0
parent: 5bfe9e1111d9862084586549a7dc79476f67bab9 (diff)
download: spark-b3dd569ad40905f8861a547a1e25ed3ca8e1d272.tar.gz
spark-b3dd569ad40905f8861a547a1e25ed3ca8e1d272.tar.bz2
spark-b3dd569ad40905f8861a547a1e25ed3ca8e1d272.zip
4 files changed, 7 insertions, 25 deletions
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 99fec6c778..e8eb88488e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -2057,6 +2057,12 @@ options.
  - The canonical name of SQL/DataFrame functions are now lower case (e.g. sum vs SUM).
  - It has been determined that using the DirectOutputCommitter when speculation is enabled is unsafe
    and thus this output committer will not be used when speculation is on, independent of configuration.
+ - JSON data source will not automatically load new files that are created by other applications
+   (i.e. files that are not inserted to the dataset through Spark SQL).
+   For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore),
+   users can use `REFRESH TABLE` SQL command or `HiveContext`'s `refreshTable` method
+   to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate
+   the DataFrame and the new DataFrame will include new files.
 
 ## Upgrading from Spark SQL 1.3 to 1.4
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala
index 114c8b2118..ab8ca5f748 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala
@@ -111,15 +111,6 @@ private[sql] class JSONRelation(
     jsonSchema
   }
 
-  override private[sql] def buildScan(
-      requiredColumns: Array[String],
-      filters: Array[Filter],
-      inputPaths: Array[String],
-      broadcastedConf: Broadcast[SerializableConfiguration]): RDD[Row] = {
-    refresh()
-    super.buildScan(requiredColumns, filters, inputPaths, broadcastedConf)
-  }
-
   override def buildScan(
       requiredColumns: Array[String],
       filters: Array[Filter],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index b3b326fe61..dff726b33f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -562,7 +562,7 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
     })
   }
 
-  private[sql] def buildScan(
+  final private[sql] def buildScan(
       requiredColumns: Array[String],
       filters: Array[Filter],
       inputPaths: Array[String],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 78bd3e5582..084d83f6e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -167,21 +167,6 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
     )
   }
 
-  test("save directly to the path of a JSON table") {
-    caseInsensitiveContext.table("jt").selectExpr("a * 5 as a", "b")
-      .write.mode(SaveMode.Overwrite).json(path.toString)
-    checkAnswer(
-      sql("SELECT a, b FROM jsonTable"),
-      (1 to 10).map(i => Row(i * 5, s"str$i"))
-    )
-
-    caseInsensitiveContext.table("jt").write.mode(SaveMode.Overwrite).json(path.toString)
-    checkAnswer(
-      sql("SELECT a, b FROM jsonTable"),
-      (1 to 10).map(i => Row(i, s"str$i"))
-    )
-  }
-
   test("it is not allowed to write to a table while querying it.") {
     val message = intercept[AnalysisException] {
       sql(
author	Yin Huai <yhuai@databricks.com>	2015-08-27 16:11:25 -0700
committer	Yin Huai <yhuai@databricks.com>	2015-08-27 16:11:25 -0700
commit	b3dd569ad40905f8861a547a1e25ed3ca8e1d272 (patch)
tree	484ae842e1511bf7bd80ca238bac26c539a84df0
parent	5bfe9e1111d9862084586549a7dc79476f67bab9 (diff)
download	spark-b3dd569ad40905f8861a547a1e25ed3ca8e1d272.tar.gz spark-b3dd569ad40905f8861a547a1e25ed3ca8e1d272.tar.bz2 spark-b3dd569ad40905f8861a547a1e25ed3ca8e1d272.zip