diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-07-31 18:18:53 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-07-31 18:18:53 -0700 |
commit | 301fb0d7236eb55d53c9cd60804a2d755b4ad3b2 (patch) | |
tree | 51383907d4c442831fac687f61711ba198949561 /sql/catalyst/src/main/scala | |
parent | 064d91ff7342002414d3274694a8e2e37f154986 (diff) | |
download | spark-301fb0d7236eb55d53c9cd60804a2d755b4ad3b2.tar.gz spark-301fb0d7236eb55d53c9cd60804a2d755b4ad3b2.tar.bz2 spark-301fb0d7236eb55d53c9cd60804a2d755b4ad3b2.zip |
[SPARK-16731][SQL] use StructType in CatalogTable and remove CatalogColumn
## What changes were proposed in this pull request?
`StructField` has very similar semantic with `CatalogColumn`, except that `CatalogColumn` use string to express data type. I think it's reasonable to use `StructType` as the `CatalogTable.schema` and remove `CatalogColumn`.
## How was this patch tested?
existing tests.
Author: Wenchen Fan <wenchen@databricks.com>
Closes #14363 from cloud-fan/column.
Diffstat (limited to 'sql/catalyst/src/main/scala')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 9 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala | 50 |
2 files changed, 14 insertions, 45 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index e36241a436..980efda6cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -259,14 +259,7 @@ class SessionCatalog( identifier = tid, tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, - schema = tempTables(table).output.map { c => - CatalogColumn( - name = c.name, - dataType = c.dataType.catalogString, - nullable = c.nullable, - comment = Option(c.name) - ) - }, + schema = tempTables(table).output.toStructType, properties = Map(), viewText = None) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 710bce5da9..38f0bc2c4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.catalyst.catalog import java.util.Date -import javax.annotation.Nullable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} @@ -26,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} import org.apache.spark.sql.catalyst.util.quoteIdentifier +import org.apache.spark.sql.types.StructType /** @@ -78,28 +78,6 @@ object CatalogStorageFormat { } /** - * A column in a table. - */ -case class CatalogColumn( - name: String, - // TODO: make this type-safe; this is left as a string due to issues in converting Hive - // varchars to and from SparkSQL strings. - dataType: String, - nullable: Boolean = true, - comment: Option[String] = None) { - - override def toString: String = { - val output = - Seq(s"`$name`", - dataType, - if (!nullable) "NOT NULL" else "", - comment.map("(" + _ + ")").getOrElse("")) - output.filter(_.nonEmpty).mkString(" ") - } - -} - -/** * A partition (Hive style) defined in the catalog. * * @param spec partition spec values indexed by column name @@ -141,7 +119,7 @@ case class CatalogTable( identifier: TableIdentifier, tableType: CatalogTableType, storage: CatalogStorageFormat, - schema: Seq[CatalogColumn], + schema: StructType, partitionColumnNames: Seq[String] = Seq.empty, bucketSpec: Option[BucketSpec] = None, owner: String = "", @@ -163,9 +141,10 @@ case class CatalogTable( requireSubsetOfSchema(bucketSpec.map(_.sortColumnNames).getOrElse(Nil), "sort") requireSubsetOfSchema(bucketSpec.map(_.bucketColumnNames).getOrElse(Nil), "bucket") - /** Columns this table is partitioned by. */ - def partitionColumns: Seq[CatalogColumn] = - schema.filter { c => partitionColumnNames.contains(c.name) } + /** schema of this table's partition columns */ + def partitionSchema: StructType = StructType(schema.filter { + c => partitionColumnNames.contains(c.name) + }) /** Return the database this table was specified to belong to, assuming it exists. */ def database: String = identifier.database.getOrElse { @@ -277,16 +256,13 @@ case class SimpleCatalogRelation( override lazy val resolved: Boolean = false override val output: Seq[Attribute] = { - val cols = catalogTable.schema - .filter { c => !catalogTable.partitionColumnNames.contains(c.name) } - (cols ++ catalogTable.partitionColumns).map { f => - AttributeReference( - f.name, - CatalystSqlParser.parseDataType(f.dataType), - // Since data can be dumped in randomly with no validation, everything is nullable. - nullable = true - )(qualifier = Some(metadata.identifier.table)) - } + val (partCols, dataCols) = metadata.schema.toAttributes + // Since data can be dumped in randomly with no validation, everything is nullable. + .map(_.withNullability(true).withQualifier(Some(metadata.identifier.table))) + .partition { a => + metadata.partitionColumnNames.contains(a.name) + } + dataCols ++ partCols } require( |