aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorTakeshi Yamamuro <yamamuro@apache.org>2017-03-29 12:37:49 -0700
committerXiao Li <gatorsmile@gmail.com>2017-03-29 12:37:49 -0700
commitc4008480b781379ac0451b9220300d83c054c60d (patch)
tree59931ecbbe483b9c963eeec0472e770d41d22d6f /sql/catalyst
parent142f6d14928c780cc9e8d6d7749c5d7c08a30972 (diff)
downloadspark-c4008480b781379ac0451b9220300d83c054c60d.tar.gz
spark-c4008480b781379ac0451b9220300d83c054c60d.tar.bz2
spark-c4008480b781379ac0451b9220300d83c054c60d.zip
[SPARK-20009][SQL] Support DDL strings for defining schema in functions.from_json
## What changes were proposed in this pull request? This pr added `StructType.fromDDL` to convert a DDL format string into `StructType` for defining schemas in `functions.from_json`. ## How was this patch tested? Added tests in `JsonFunctionsSuite`. Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #17406 from maropu/SPARK-20009.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala6
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala85
2 files changed, 70 insertions, 21 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 8d8b5b86d5..54006e20a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -417,6 +417,12 @@ object StructType extends AbstractDataType {
}
}
+ /**
+ * Creates StructType for a given DDL-formatted string, which is a comma separated list of field
+ * definitions, e.g., a INT, b STRING.
+ */
+ def fromDDL(ddl: String): StructType = CatalystSqlParser.parseTableSchema(ddl)
+
def apply(fields: Seq[StructField]): StructType = StructType(fields.toArray)
def apply(fields: java.util.List[StructField]): StructType = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 61e1ec7c7a..05cb999af6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -169,30 +169,72 @@ class DataTypeSuite extends SparkFunSuite {
assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType]))
}
- def checkDataTypeJsonRepr(dataType: DataType): Unit = {
- test(s"JSON - $dataType") {
+ def checkDataTypeFromJson(dataType: DataType): Unit = {
+ test(s"from Json - $dataType") {
assert(DataType.fromJson(dataType.json) === dataType)
}
}
- checkDataTypeJsonRepr(NullType)
- checkDataTypeJsonRepr(BooleanType)
- checkDataTypeJsonRepr(ByteType)
- checkDataTypeJsonRepr(ShortType)
- checkDataTypeJsonRepr(IntegerType)
- checkDataTypeJsonRepr(LongType)
- checkDataTypeJsonRepr(FloatType)
- checkDataTypeJsonRepr(DoubleType)
- checkDataTypeJsonRepr(DecimalType(10, 5))
- checkDataTypeJsonRepr(DecimalType.SYSTEM_DEFAULT)
- checkDataTypeJsonRepr(DateType)
- checkDataTypeJsonRepr(TimestampType)
- checkDataTypeJsonRepr(StringType)
- checkDataTypeJsonRepr(BinaryType)
- checkDataTypeJsonRepr(ArrayType(DoubleType, true))
- checkDataTypeJsonRepr(ArrayType(StringType, false))
- checkDataTypeJsonRepr(MapType(IntegerType, StringType, true))
- checkDataTypeJsonRepr(MapType(IntegerType, ArrayType(DoubleType), false))
+ def checkDataTypeFromDDL(dataType: DataType): Unit = {
+ test(s"from DDL - $dataType") {
+ val parsed = StructType.fromDDL(s"a ${dataType.sql}")
+ val expected = new StructType().add("a", dataType)
+ assert(parsed.sameType(expected))
+ }
+ }
+
+ checkDataTypeFromJson(NullType)
+
+ checkDataTypeFromJson(BooleanType)
+ checkDataTypeFromDDL(BooleanType)
+
+ checkDataTypeFromJson(ByteType)
+ checkDataTypeFromDDL(ByteType)
+
+ checkDataTypeFromJson(ShortType)
+ checkDataTypeFromDDL(ShortType)
+
+ checkDataTypeFromJson(IntegerType)
+ checkDataTypeFromDDL(IntegerType)
+
+ checkDataTypeFromJson(LongType)
+ checkDataTypeFromDDL(LongType)
+
+ checkDataTypeFromJson(FloatType)
+ checkDataTypeFromDDL(FloatType)
+
+ checkDataTypeFromJson(DoubleType)
+ checkDataTypeFromDDL(DoubleType)
+
+ checkDataTypeFromJson(DecimalType(10, 5))
+ checkDataTypeFromDDL(DecimalType(10, 5))
+
+ checkDataTypeFromJson(DecimalType.SYSTEM_DEFAULT)
+ checkDataTypeFromDDL(DecimalType.SYSTEM_DEFAULT)
+
+ checkDataTypeFromJson(DateType)
+ checkDataTypeFromDDL(DateType)
+
+ checkDataTypeFromJson(TimestampType)
+ checkDataTypeFromDDL(TimestampType)
+
+ checkDataTypeFromJson(StringType)
+ checkDataTypeFromDDL(StringType)
+
+ checkDataTypeFromJson(BinaryType)
+ checkDataTypeFromDDL(BinaryType)
+
+ checkDataTypeFromJson(ArrayType(DoubleType, true))
+ checkDataTypeFromDDL(ArrayType(DoubleType, true))
+
+ checkDataTypeFromJson(ArrayType(StringType, false))
+ checkDataTypeFromDDL(ArrayType(StringType, false))
+
+ checkDataTypeFromJson(MapType(IntegerType, StringType, true))
+ checkDataTypeFromDDL(MapType(IntegerType, StringType, true))
+
+ checkDataTypeFromJson(MapType(IntegerType, ArrayType(DoubleType), false))
+ checkDataTypeFromDDL(MapType(IntegerType, ArrayType(DoubleType), false))
val metadata = new MetadataBuilder()
.putString("name", "age")
@@ -201,7 +243,8 @@ class DataTypeSuite extends SparkFunSuite {
StructField("a", IntegerType, nullable = true),
StructField("b", ArrayType(DoubleType), nullable = false),
StructField("c", DoubleType, nullable = false, metadata)))
- checkDataTypeJsonRepr(structType)
+ checkDataTypeFromJson(structType)
+ checkDataTypeFromDDL(structType)
def checkDefaultSize(dataType: DataType, expectedDefaultSize: Int): Unit = {
test(s"Check the default size of $dataType") {