diff options
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala | 34 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala | 4 |
2 files changed, 33 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 5b5378c09e..24d75ab02c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -21,8 +21,8 @@ import java.util.Date import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier} -import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal} -import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Cast, Literal} +import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.types.{StructField, StructType} @@ -171,7 +171,7 @@ case class CatalogTable( createTime: Long = System.currentTimeMillis, lastAccessTime: Long = -1, properties: Map[String, String] = Map.empty, - stats: Option[Statistics] = None, + stats: Option[CatalogStatistics] = None, viewOriginalText: Option[String] = None, viewText: Option[String] = None, comment: Option[String] = None, @@ -247,6 +247,34 @@ case class CatalogTable( } +/** + * This class of statistics is used in [[CatalogTable]] to interact with metastore. + * We define this new class instead of directly using [[Statistics]] here because there are no + * concepts of attributes or broadcast hint in catalog. + */ +case class CatalogStatistics( + sizeInBytes: BigInt, + rowCount: Option[BigInt] = None, + colStats: Map[String, ColumnStat] = Map.empty) { + + /** + * Convert [[CatalogStatistics]] to [[Statistics]], and match column stats to attributes based + * on column names. + */ + def toPlanStats(planOutput: Seq[Attribute]): Statistics = { + val matched = planOutput.flatMap(a => colStats.get(a.name).map(a -> _)) + Statistics(sizeInBytes = sizeInBytes, rowCount = rowCount, + attributeStats = AttributeMap(matched)) + } + + /** Readable string representation for the CatalogStatistics. */ + def simpleString: String = { + val rowCountString = if (rowCount.isDefined) s", ${rowCount.get} rows" else "" + s"$sizeInBytes bytes$rowCountString" + } +} + + case class CatalogTableType private(name: String) object CatalogTableType { val EXTERNAL = new CatalogTableType("EXTERNAL") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala index 465fbab571..91404d4bb8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala @@ -41,13 +41,13 @@ import org.apache.spark.sql.types._ * @param sizeInBytes Physical size in bytes. For leaf operators this defaults to 1, otherwise it * defaults to the product of children's `sizeInBytes`. * @param rowCount Estimated number of rows. - * @param colStats Column-level statistics. + * @param attributeStats Statistics for Attributes. * @param isBroadcastable If true, output is small enough to be used in a broadcast join. */ case class Statistics( sizeInBytes: BigInt, rowCount: Option[BigInt] = None, - colStats: Map[String, ColumnStat] = Map.empty, + attributeStats: AttributeMap[ColumnStat] = AttributeMap(Nil), isBroadcastable: Boolean = false) { override def toString: String = "Statistics(" + simpleString + ")" |