[SPARK-17980][SQL] Fix refreshByPath for converted Hive tables

## What changes were proposed in this pull request? There was a bug introduced in https://github.com/apache/spark/pull/14690 which broke refreshByPath with converted hive tables (though, it turns out it was very difficult to refresh converted hive tables anyways, since you had to specify the exact path of one of the partitions). This changes refreshByPath to invalidate by prefix instead of exact match, and fixes the issue. cc sameeragarwal for refreshByPath changes mallman ## How was this patch tested? Extended unit test. Author: Eric Liang <ekl@databricks.com> Closes #15521 from ericl/fix-caching.
author: Eric Liang <ekl@databricks.com> 2016-10-19 10:20:12 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2016-10-19 10:20:12 +0800
commit: 5f20ae0394388574a3767daf7f499c89658f61e1 (patch)
tree: 7939521366699b07f05d79d47a31fe3b87beb0c8 /sql/hive
parent: 941b3f9aca59e62c078508a934f8c2221ced96ce (diff)
download: spark-5f20ae0394388574a3767daf7f499c89658f61e1.tar.gz
spark-5f20ae0394388574a3767daf7f499c89658f61e1.tar.bz2
spark-5f20ae0394388574a3767daf7f499c89658f61e1.zip
2 files changed, 20 insertions, 3 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 16e1e37b2f..c909eb5d20 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -235,7 +235,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
           if (lazyPruningEnabled) {
             catalog
           } else {
-            catalog.cachedAllPartitions
+            catalog.allPartitions
           }
         }
         val partitionSchemaColumnNames = partitionSchema.map(_.name.toLowerCase).toSet
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
index 7af81a3a90..2ca1cd4c07 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
@@ -80,9 +80,13 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
             val df = spark.sql("select * from test")
             assert(sql("select * from test").count() == 5)
 
+            def deleteRandomFile(): Unit = {
+              val p = new Path(spark.table("test").inputFiles.head)
+              assert(p.getFileSystem(hiveContext.sessionState.newHadoopConf()).delete(p, true))
+            }
+
             // Delete a file, then assert that we tried to read it. This means the table was cached.
-            val p = new Path(spark.table("test").inputFiles.head)
-            assert(p.getFileSystem(hiveContext.sessionState.newHadoopConf()).delete(p, true))
+            deleteRandomFile()
             val e = intercept[SparkException] {
               sql("select * from test").count()
             }
@@ -91,6 +95,19 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
             // Test refreshing the cache.
             spark.catalog.refreshTable("test")
             assert(sql("select * from test").count() == 4)
+            assert(spark.table("test").inputFiles.length == 4)
+
+            // Test refresh by path separately since it goes through different code paths than
+            // refreshTable does.
+            deleteRandomFile()
+            spark.catalog.cacheTable("test")
+            spark.catalog.refreshByPath("/some-invalid-path")  // no-op
+            val e2 = intercept[SparkException] {
+              sql("select * from test").count()
+            }
+            assert(e2.getMessage.contains("FileNotFoundException"))
+            spark.catalog.refreshByPath(dir.getAbsolutePath)
+            assert(sql("select * from test").count() == 3)
           }
         }
       }
author	Eric Liang <ekl@databricks.com>	2016-10-19 10:20:12 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2016-10-19 10:20:12 +0800
commit	5f20ae0394388574a3767daf7f499c89658f61e1 (patch)
tree	7939521366699b07f05d79d47a31fe3b87beb0c8 /sql/hive
parent	941b3f9aca59e62c078508a934f8c2221ced96ce (diff)
download	spark-5f20ae0394388574a3767daf7f499c89658f61e1.tar.gz spark-5f20ae0394388574a3767daf7f499c89658f61e1.tar.bz2 spark-5f20ae0394388574a3767daf7f499c89658f61e1.zip