diff options
author | gatorsmile <gatorsmile@gmail.com> | 2016-06-29 19:36:21 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-06-29 19:36:21 +0800 |
commit | 7ee9e39cb43c43d69dfe8035106f7556886e60b1 (patch) | |
tree | 60c85bc0da0c145a2b3d27a580c194a305186762 /sql/catalyst/src/main | |
parent | d1e8108854deba3de8e2d87eb4389d11fb17ee57 (diff) | |
download | spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.tar.gz spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.tar.bz2 spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.zip |
[SPARK-16157][SQL] Add New Methods for comments in StructField and StructType
#### What changes were proposed in this pull request?
Based on the previous discussion with cloud-fan hvanhovell in another related PR https://github.com/apache/spark/pull/13764#discussion_r67994276, it looks reasonable to add convenience methods for users to add `comment` when defining `StructField`.
Currently, the column-related `comment` attribute is stored in `Metadata` of `StructField`. For example, users can add the `comment` attribute using the following way:
```Scala
StructType(
StructField(
"cl1",
IntegerType,
nullable = false,
new MetadataBuilder().putString("comment", "test").build()) :: Nil)
```
This PR is to add more user friendly methods for the `comment` attribute when defining a `StructField`. After the changes, users are provided three different ways to do it:
```Scala
val struct = (new StructType)
.add("a", "int", true, "test1")
val struct = (new StructType)
.add("c", StringType, true, "test3")
val struct = (new StructType)
.add(StructField("d", StringType).withComment("test4"))
```
#### How was this patch tested?
Added test cases:
- `DataTypeSuite` is for testing three types of API changes,
- `DataFrameReaderWriterSuite` is for parquet, json and csv formats - using in-memory catalog
- `OrcQuerySuite.scala` is for orc format using Hive-metastore
Author: gatorsmile <gatorsmile@gmail.com>
Closes #13860 from gatorsmile/newMethodForComment.
Diffstat (limited to 'sql/catalyst/src/main')
3 files changed, 55 insertions, 8 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c7420a1c59..f2cc8d3624 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1430,13 +1430,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { */ override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) { import ctx._ - - // Add the comment to the metadata. - val builder = new MetadataBuilder - if (STRING != null) { - builder.putString("comment", string(STRING)) - } - - StructField(identifier.getText, typedVisit(dataType), nullable = true, builder.build()) + val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true) + if (STRING == null) structField else structField.withComment(string(STRING)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala index 83570a5eae..cb8bf61696 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -51,4 +51,22 @@ case class StructField( ("nullable" -> nullable) ~ ("metadata" -> metadata.jsonValue) } + + /** + * Updates the StructField with a new comment value. + */ + def withComment(comment: String): StructField = { + val newMetadata = new MetadataBuilder() + .withMetadata(metadata) + .putString("comment", comment) + .build() + copy(metadata = newMetadata) + } + + /** + * Return the comment of this StructField. + */ + def getComment(): Option[String] = { + if (metadata.contains("comment")) Option(metadata.getString("comment")) else None + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index 436512ff69..0e89f71dc1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -171,6 +171,23 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru } /** + * Creates a new [[StructType]] by adding a new field and specifying metadata. + * {{{ + * val struct = (new StructType) + * .add("a", IntegerType, true, "comment1") + * .add("b", LongType, false, "comment2") + * .add("c", StringType, true, "comment3") + * }}} + */ + def add( + name: String, + dataType: DataType, + nullable: Boolean, + comment: String): StructType = { + StructType(fields :+ StructField(name, dataType, nullable).withComment(comment)) + } + + /** * Creates a new [[StructType]] by adding a new nullable field with no metadata where the * dataType is specified as a String. * @@ -219,6 +236,24 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru } /** + * Creates a new [[StructType]] by adding a new field and specifying metadata where the + * dataType is specified as a String. + * {{{ + * val struct = (new StructType) + * .add("a", "int", true, "comment1") + * .add("b", "long", false, "comment2") + * .add("c", "string", true, "comment3") + * }}} + */ + def add( + name: String, + dataType: String, + nullable: Boolean, + comment: String): StructType = { + add(name, CatalystSqlParser.parseDataType(dataType), nullable, comment) + } + + /** * Extracts the [[StructField]] with the given name. * * @throws IllegalArgumentException if a field with the given name does not exist |