aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test
diff options
context:
space:
mode:
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala51
1 files changed, 47 insertions, 4 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 9838b9a4eb..65c02d473b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -60,36 +60,52 @@ class PartitionedTablePerfStatsSuite
setupPartitionedHiveTable(tableName, dir, 5)
}
- private def setupPartitionedHiveTable(tableName: String, dir: File, scale: Int): Unit = {
+ private def setupPartitionedHiveTable(
+ tableName: String, dir: File, scale: Int,
+ clearMetricsBeforeCreate: Boolean = false, repair: Boolean = true): Unit = {
spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
.partitionBy("partCol1", "partCol2")
.mode("overwrite")
.parquet(dir.getAbsolutePath)
+ if (clearMetricsBeforeCreate) {
+ HiveCatalogMetrics.reset()
+ }
+
spark.sql(s"""
|create external table $tableName (fieldOne long)
|partitioned by (partCol1 int, partCol2 int)
|stored as parquet
|location "${dir.getAbsolutePath}"""".stripMargin)
- spark.sql(s"msck repair table $tableName")
+ if (repair) {
+ spark.sql(s"msck repair table $tableName")
+ }
}
private def setupPartitionedDatasourceTable(tableName: String, dir: File): Unit = {
setupPartitionedDatasourceTable(tableName, dir, 5)
}
- private def setupPartitionedDatasourceTable(tableName: String, dir: File, scale: Int): Unit = {
+ private def setupPartitionedDatasourceTable(
+ tableName: String, dir: File, scale: Int,
+ clearMetricsBeforeCreate: Boolean = false, repair: Boolean = true): Unit = {
spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
.partitionBy("partCol1", "partCol2")
.mode("overwrite")
.parquet(dir.getAbsolutePath)
+ if (clearMetricsBeforeCreate) {
+ HiveCatalogMetrics.reset()
+ }
+
spark.sql(s"""
|create table $tableName (fieldOne long, partCol1 int, partCol2 int)
|using parquet
|options (path "${dir.getAbsolutePath}")
|partitioned by (partCol1, partCol2)""".stripMargin)
- spark.sql(s"msck repair table $tableName")
+ if (repair) {
+ spark.sql(s"msck repair table $tableName")
+ }
}
genericTest("partitioned pruned table reports only selected files") { spec =>
@@ -250,6 +266,33 @@ class PartitionedTablePerfStatsSuite
}
}
+ test("datasource table: table setup does not scan filesystem") {
+ withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+ withTable("test") {
+ withTempDir { dir =>
+ setupPartitionedDatasourceTable(
+ "test", dir, scale = 10, clearMetricsBeforeCreate = true, repair = false)
+ assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 0)
+ assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
+ }
+ }
+ }
+ }
+
+ test("hive table: table setup does not scan filesystem") {
+ withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+ withTable("test") {
+ withTempDir { dir =>
+ HiveCatalogMetrics.reset()
+ setupPartitionedHiveTable(
+ "test", dir, scale = 10, clearMetricsBeforeCreate = true, repair = false)
+ assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 0)
+ assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
+ }
+ }
+ }
+ }
+
test("hive table: num hive client calls does not scale with partition count") {
withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
withTable("test") {