[SPARK-18911][SQL] Define CatalogStatistics to interact with metastore and convert it to Statistics in relations

## What changes were proposed in this pull request? Statistics in LogicalPlan should use attributes to refer to columns rather than column names, because two columns from two relations can have the same column name. But CatalogTable doesn't have the concepts of attribute or broadcast hint in Statistics. Therefore, putting Statistics in CatalogTable is confusing. We define a different statistic structure in CatalogTable, which is only responsible for interacting with metastore, and is converted to statistics in LogicalPlan when it is used. ## How was this patch tested? add test cases Author: wangzhenhua <wangzhenhua@huawei.com> Author: Zhenhua Wang <wzh_zju@163.com> Closes #16323 from wzhfy/nameToAttr.
author: wangzhenhua <wangzhenhua@huawei.com> 2016-12-24 15:34:44 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2016-12-24 15:34:44 +0800
commit: 3cff8161578b65139c9740fed694d4b3c81fa74a (patch)
tree: b957978656c6e855216fcd3964febceadf2379cc /sql/hive/src/test
parent: a848f0ba84e37fd95d0f47863ec68326e3296b33 (diff)
download: spark-3cff8161578b65139c9740fed694d4b3c81fa74a.tar.gz
spark-3cff8161578b65139c9740fed694d4b3c81fa74a.tar.bz2
spark-3cff8161578b65139c9740fed694d4b3c81fa74a.zip
1 files changed, 7 insertions, 6 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 5ae202fdc9..8803ea36de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.plans.logical.Statistics
+import org.apache.spark.sql.catalyst.catalog.CatalogStatistics
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.joins._
@@ -152,7 +152,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   }
 
   private def checkTableStats(
-      stats: Option[Statistics],
+      stats: Option[CatalogStatistics],
       hasSizeInBytes: Boolean,
       expectedRowCounts: Option[Int]): Unit = {
     if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
@@ -168,7 +168,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       tableName: String,
       isDataSourceTable: Boolean,
       hasSizeInBytes: Boolean,
-      expectedRowCounts: Option[Int]): Option[Statistics] = {
+      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
     val df = sql(s"SELECT * FROM $tableName")
     val stats = df.queryExecution.analyzed.collect {
       case rel: MetastoreRelation =>
@@ -435,10 +435,11 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   }
 
   /** Used to test refreshing cached metadata once table stats are updated. */
-  private def getStatsBeforeAfterUpdate(isAnalyzeColumns: Boolean): (Statistics, Statistics) = {
+  private def getStatsBeforeAfterUpdate(isAnalyzeColumns: Boolean)
+    : (CatalogStatistics, CatalogStatistics) = {
     val tableName = "tbl"
-    var statsBeforeUpdate: Statistics = null
-    var statsAfterUpdate: Statistics = null
+    var statsBeforeUpdate: CatalogStatistics = null
+    var statsAfterUpdate: CatalogStatistics = null
     withTable(tableName) {
       val tableIndent = TableIdentifier(tableName, Some("default"))
       val catalog = spark.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
author	wangzhenhua <wangzhenhua@huawei.com>	2016-12-24 15:34:44 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2016-12-24 15:34:44 +0800
commit	3cff8161578b65139c9740fed694d4b3c81fa74a (patch)
tree	b957978656c6e855216fcd3964febceadf2379cc /sql/hive/src/test
parent	a848f0ba84e37fd95d0f47863ec68326e3296b33 (diff)
download	spark-3cff8161578b65139c9740fed694d4b3c81fa74a.tar.gz spark-3cff8161578b65139c9740fed694d4b3c81fa74a.tar.bz2 spark-3cff8161578b65139c9740fed694d4b3c81fa74a.zip