aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/scala/org
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2017-01-19 00:07:48 -0800
committergatorsmile <gatorsmile@gmail.com>2017-01-19 00:07:48 -0800
commit2e62560024999c215cf2373fc9a8070bb2ad5c58 (patch)
tree75fadf15026f631e0e8195beda1d1f7c03258c1b /sql/hive/src/test/scala/org
parent0c9231858866eff16f97df073d22811176fb6b36 (diff)
downloadspark-2e62560024999c215cf2373fc9a8070bb2ad5c58.tar.gz
spark-2e62560024999c215cf2373fc9a8070bb2ad5c58.tar.bz2
spark-2e62560024999c215cf2373fc9a8070bb2ad5c58.zip
[SPARK-19265][SQL] make table relation cache general and does not depend on hive
## What changes were proposed in this pull request? We have a table relation plan cache in `HiveMetastoreCatalog`, which caches a lot of things: file status, resolved data source, inferred schema, etc. However, it doesn't make sense to limit this cache with hive support, we should move it to SQL core module so that users can use this cache without hive support. It can also reduce the size of `HiveMetastoreCatalog`, so that it's easier to remove it eventually. main changes: 1. move the table relation cache to `SessionCatalog` 2. `SessionCatalog.lookupRelation` will return `SimpleCatalogRelation` and the analyzer will convert it to `LogicalRelation` or `MetastoreRelation` later, then `HiveSessionCatalog` doesn't need to override `lookupRelation` anymore 3. `FindDataSourceTable` will read/write the table relation cache. ## How was this patch tested? existing tests. Author: Wenchen Fan <wenchen@databricks.com> Closes #16621 from cloud-fan/plan-cache.
Diffstat (limited to 'sql/hive/src/test/scala/org')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala22
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala8
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala17
3 files changed, 35 insertions, 12 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 081f6f6d82..f0e2c9369b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1322,4 +1322,26 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue)
}
}
+
+ test("SPARK-18464: support old table which doesn't store schema in table properties") {
+ withTable("old") {
+ withTempPath { path =>
+ Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
+ val tableDesc = CatalogTable(
+ identifier = TableIdentifier("old", Some("default")),
+ tableType = CatalogTableType.EXTERNAL,
+ storage = CatalogStorageFormat.empty.copy(
+ properties = Map("path" -> path.getAbsolutePath)
+ ),
+ schema = new StructType(),
+ properties = Map(
+ HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
+ hiveClient.createTable(tableDesc, ignoreIfExists = false)
+
+ checkAnswer(spark.table("old"), Row(1, "a"))
+
+ checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil)
+ }
+ }
+ }
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 0053aa1642..e2fcd2fd41 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -62,7 +62,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
spark.conf.set(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key, true)
- val relation = spark.sessionState.catalog.lookupRelation(TableIdentifier("csv_table"))
+ val relation = spark.table("csv_table").queryExecution.analyzed.children.head
.asInstanceOf[MetastoreRelation]
val properties = relation.hiveQlTable.getParameters
@@ -80,7 +80,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
test("analyze MetastoreRelations") {
def queryTotalSize(tableName: String): BigInt =
- spark.sessionState.catalog.lookupRelation(TableIdentifier(tableName)).stats(conf).sizeInBytes
+ spark.table(tableName).queryExecution.analyzed.stats(conf).sizeInBytes
// Non-partitioned table
sql("CREATE TABLE analyzeTable (key STRING, value STRING)").collect()
@@ -451,7 +451,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
}
// Table lookup will make the table cached.
- catalog.lookupRelation(tableIndent)
+ spark.table(tableIndent)
statsBeforeUpdate = catalog.getCachedDataSourceTable(tableIndent)
.asInstanceOf[LogicalRelation].catalogTable.get.stats.get
@@ -461,7 +461,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
} else {
sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
}
- catalog.lookupRelation(tableIndent)
+ spark.table(tableIndent)
statsAfterUpdate = catalog.getCachedDataSourceTable(tableIndent)
.asInstanceOf[LogicalRelation].catalogTable.get.stats.get
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 953e29127f..104b5250b6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry, NoSuchPartitionException}
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
@@ -513,8 +514,12 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
isDataSourceTable: Boolean,
format: String,
userSpecifiedLocation: Option[String] = None): Unit = {
- val relation = EliminateSubqueryAliases(
- sessionState.catalog.lookupRelation(TableIdentifier(tableName)))
+ var relation: LogicalPlan = null
+ withSQLConf(
+ HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false",
+ HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
+ relation = EliminateSubqueryAliases(spark.table(tableName).queryExecution.analyzed)
+ }
val catalogTable =
sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
relation match {
@@ -1021,13 +1026,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
// generates an invalid query plan.
val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}"""))
read.json(rdd).createOrReplaceTempView("data")
- val originalConf = sessionState.conf.convertCTAS
- setConf(SQLConf.CONVERT_CTAS, false)
- try {
+ withSQLConf(SQLConf.CONVERT_CTAS.key -> "false") {
sql("CREATE TABLE explodeTest (key bigInt)")
table("explodeTest").queryExecution.analyzed match {
- case metastoreRelation: MetastoreRelation => // OK
+ case SubqueryAlias(_, r: MetastoreRelation, _) => // OK
case _ =>
fail("To correctly test the fix of SPARK-5875, explodeTest should be a MetastoreRelation")
}
@@ -1040,8 +1043,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
sql("DROP TABLE explodeTest")
dropTempTable("data")
- } finally {
- setConf(SQLConf.CONVERT_CTAS, originalConf)
}
}