aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-06-29 19:36:21 +0800
committerWenchen Fan <wenchen@databricks.com>2016-06-29 19:36:21 +0800
commit7ee9e39cb43c43d69dfe8035106f7556886e60b1 (patch)
tree60c85bc0da0c145a2b3d27a580c194a305186762 /sql/hive/src/test
parentd1e8108854deba3de8e2d87eb4389d11fb17ee57 (diff)
downloadspark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.tar.gz
spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.tar.bz2
spark-7ee9e39cb43c43d69dfe8035106f7556886e60b1.zip
[SPARK-16157][SQL] Add New Methods for comments in StructField and StructType
#### What changes were proposed in this pull request? Based on the previous discussion with cloud-fan hvanhovell in another related PR https://github.com/apache/spark/pull/13764#discussion_r67994276, it looks reasonable to add convenience methods for users to add `comment` when defining `StructField`. Currently, the column-related `comment` attribute is stored in `Metadata` of `StructField`. For example, users can add the `comment` attribute using the following way: ```Scala StructType( StructField( "cl1", IntegerType, nullable = false, new MetadataBuilder().putString("comment", "test").build()) :: Nil) ``` This PR is to add more user friendly methods for the `comment` attribute when defining a `StructField`. After the changes, users are provided three different ways to do it: ```Scala val struct = (new StructType) .add("a", "int", true, "test1") val struct = (new StructType) .add("c", StringType, true, "test3") val struct = (new StructType) .add(StructField("d", StringType).withComment("test4")) ``` #### How was this patch tested? Added test cases: - `DataTypeSuite` is for testing three types of API changes, - `DataFrameReaderWriterSuite` is for parquet, json and csv formats - using in-memory catalog - `OrcQuerySuite.scala` is for orc format using Hive-metastore Author: gatorsmile <gatorsmile@gmail.com> Closes #13860 from gatorsmile/newMethodForComment.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala22
1 files changed, 22 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index cd41da7214..4a86987e29 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.hive.test.TestHive.implicits._
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{IntegerType, StructType}
case class AllDataTypesWithNonPrimitiveType(
stringField: String,
@@ -462,4 +463,25 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
}
}
}
+
+ test("column nullability and comment - write and then read") {
+ val schema = (new StructType)
+ .add("cl1", IntegerType, nullable = false, comment = "test")
+ .add("cl2", IntegerType, nullable = true)
+ .add("cl3", IntegerType, nullable = true)
+ val row = Row(3, null, 4)
+ val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
+
+ val tableName = "tab"
+ withTable(tableName) {
+ df.write.format("orc").mode("overwrite").saveAsTable(tableName)
+ // Verify the DDL command result: DESCRIBE TABLE
+ checkAnswer(
+ sql(s"desc $tableName").select("col_name", "comment").where($"comment" === "test"),
+ Row("cl1", "test") :: Nil)
+ // Verify the schema
+ val expectedFields = schema.fields.map(f => f.copy(nullable = true))
+ assert(spark.table(tableName).schema == schema.copy(fields = expectedFields))
+ }
+ }
}