aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-06-29 19:36:21 +0800
committerWenchen Fan <wenchen@databricks.com>2016-06-29 19:36:21 +0800
commit7ee9e39cb43c43d69dfe8035106f7556886e60b1 (patch)
tree60c85bc0da0c145a2b3d27a580c194a305186762 /sql/catalyst/src/main
parentd1e8108854deba3de8e2d87eb4389d11fb17ee57 (diff)
downloadspark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.tar.gz
spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.tar.bz2
spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.zip
[SPARK-16157][SQL] Add New Methods for comments in StructField and StructType
#### What changes were proposed in this pull request? Based on the previous discussion with cloud-fan hvanhovell in another related PR https://github.com/apache/spark/pull/13764#discussion_r67994276, it looks reasonable to add convenience methods for users to add `comment` when defining `StructField`. Currently, the column-related `comment` attribute is stored in `Metadata` of `StructField`. For example, users can add the `comment` attribute using the following way: ```Scala StructType( StructField( "cl1", IntegerType, nullable = false, new MetadataBuilder().putString("comment", "test").build()) :: Nil) ``` This PR is to add more user friendly methods for the `comment` attribute when defining a `StructField`. After the changes, users are provided three different ways to do it: ```Scala val struct = (new StructType) .add("a", "int", true, "test1") val struct = (new StructType) .add("c", StringType, true, "test3") val struct = (new StructType) .add(StructField("d", StringType).withComment("test4")) ``` #### How was this patch tested? Added test cases: - `DataTypeSuite` is for testing three types of API changes, - `DataFrameReaderWriterSuite` is for parquet, json and csv formats - using in-memory catalog - `OrcQuerySuite.scala` is for orc format using Hive-metastore Author: gatorsmile <gatorsmile@gmail.com> Closes #13860 from gatorsmile/newMethodForComment.
Diffstat (limited to 'sql/catalyst/src/main')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala10
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala18
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala35
3 files changed, 55 insertions, 8 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c7420a1c59..f2cc8d3624 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1430,13 +1430,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
*/
override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) {
import ctx._
-
- // Add the comment to the metadata.
- val builder = new MetadataBuilder
- if (STRING != null) {
- builder.putString("comment", string(STRING))
- }
-
- StructField(identifier.getText, typedVisit(dataType), nullable = true, builder.build())
+ val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true)
+ if (STRING == null) structField else structField.withComment(string(STRING))
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 83570a5eae..cb8bf61696 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -51,4 +51,22 @@ case class StructField(
("nullable" -> nullable) ~
("metadata" -> metadata.jsonValue)
}
+
+ /**
+ * Updates the StructField with a new comment value.
+ */
+ def withComment(comment: String): StructField = {
+ val newMetadata = new MetadataBuilder()
+ .withMetadata(metadata)
+ .putString("comment", comment)
+ .build()
+ copy(metadata = newMetadata)
+ }
+
+ /**
+ * Return the comment of this StructField.
+ */
+ def getComment(): Option[String] = {
+ if (metadata.contains("comment")) Option(metadata.getString("comment")) else None
+ }
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 436512ff69..0e89f71dc1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -171,6 +171,23 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
}
/**
+ * Creates a new [[StructType]] by adding a new field and specifying metadata.
+ * {{{
+ * val struct = (new StructType)
+ * .add("a", IntegerType, true, "comment1")
+ * .add("b", LongType, false, "comment2")
+ * .add("c", StringType, true, "comment3")
+ * }}}
+ */
+ def add(
+ name: String,
+ dataType: DataType,
+ nullable: Boolean,
+ comment: String): StructType = {
+ StructType(fields :+ StructField(name, dataType, nullable).withComment(comment))
+ }
+
+ /**
* Creates a new [[StructType]] by adding a new nullable field with no metadata where the
* dataType is specified as a String.
*
@@ -219,6 +236,24 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
}
/**
+ * Creates a new [[StructType]] by adding a new field and specifying metadata where the
+ * dataType is specified as a String.
+ * {{{
+ * val struct = (new StructType)
+ * .add("a", "int", true, "comment1")
+ * .add("b", "long", false, "comment2")
+ * .add("c", "string", true, "comment3")
+ * }}}
+ */
+ def add(
+ name: String,
+ dataType: String,
+ nullable: Boolean,
+ comment: String): StructType = {
+ add(name, CatalystSqlParser.parseDataType(dataType), nullable, comment)
+ }
+
+ /**
* Extracts the [[StructField]] with the given name.
*
* @throws IllegalArgumentException if a field with the given name does not exist