From 85235ed6c600270e3fa434738bd50dce3564440a Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Wed, 23 Nov 2016 20:14:08 +0800 Subject: [SPARK-18545][SQL] Verify number of hive client RPCs in PartitionedTablePerfStatsSuite ## What changes were proposed in this pull request? This would help catch accidental O(n) calls to the hive client as in https://issues.apache.org/jira/browse/SPARK-18507 ## How was this patch tested? Checked that the test fails before https://issues.apache.org/jira/browse/SPARK-18507 was patched. cc cloud-fan Author: Eric Liang Closes #15985 from ericl/spark-18545. --- .../sql/hive/PartitionedTablePerfStatsSuite.scala | 58 +++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) (limited to 'sql/hive/src/test') diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala index b41bc862e9..9838b9a4eb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala @@ -57,7 +57,11 @@ class PartitionedTablePerfStatsSuite } private def setupPartitionedHiveTable(tableName: String, dir: File): Unit = { - spark.range(5).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write + setupPartitionedHiveTable(tableName, dir, 5) + } + + private def setupPartitionedHiveTable(tableName: String, dir: File, scale: Int): Unit = { + spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write .partitionBy("partCol1", "partCol2") .mode("overwrite") .parquet(dir.getAbsolutePath) @@ -71,7 +75,11 @@ class PartitionedTablePerfStatsSuite } private def setupPartitionedDatasourceTable(tableName: String, dir: File): Unit = { - spark.range(5).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write + setupPartitionedDatasourceTable(tableName, dir, 5) + } + + private def setupPartitionedDatasourceTable(tableName: String, dir: File, scale: Int): Unit = { + spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write .partitionBy("partCol1", "partCol2") .mode("overwrite") .parquet(dir.getAbsolutePath) @@ -242,6 +250,52 @@ class PartitionedTablePerfStatsSuite } } + test("hive table: num hive client calls does not scale with partition count") { + withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") { + withTable("test") { + withTempDir { dir => + setupPartitionedHiveTable("test", dir, scale = 100) + + HiveCatalogMetrics.reset() + assert(spark.sql("select * from test where partCol1 = 1").count() == 1) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() > 0) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + + HiveCatalogMetrics.reset() + assert(spark.sql("select * from test").count() == 100) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + + HiveCatalogMetrics.reset() + assert(spark.sql("show partitions test").count() == 100) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + } + } + } + } + + test("datasource table: num hive client calls does not scale with partition count") { + withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") { + withTable("test") { + withTempDir { dir => + setupPartitionedDatasourceTable("test", dir, scale = 100) + + HiveCatalogMetrics.reset() + assert(spark.sql("select * from test where partCol1 = 1").count() == 1) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() > 0) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + + HiveCatalogMetrics.reset() + assert(spark.sql("select * from test").count() == 100) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + + HiveCatalogMetrics.reset() + assert(spark.sql("show partitions test").count() == 100) + assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10) + } + } + } + } + test("hive table: files read and cached when filesource partition management is off") { withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") { withTable("test") { -- cgit v1.2.3