aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-03-21 13:27:53 -0700
committerMichael Armbrust <michael@databricks.com>2015-03-21 13:27:53 -0700
commit94a102acb80a7c77f57409ece1f8dbbba791b774 (patch)
tree334a3c7028af526bfd1954e05eb4e1148cfdb8ab /sql/core/src
parentee569a0c7171d149eee52877def902378eaf695e (diff)
downloadspark-94a102acb80a7c77f57409ece1f8dbbba791b774.tar.gz
spark-94a102acb80a7c77f57409ece1f8dbbba791b774.tar.bz2
spark-94a102acb80a7c77f57409ece1f8dbbba791b774.zip
[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.
This PR creates a trait `DataTypeParser` used to parse data types. This trait aims to be single place to provide the functionality of parsing data types' string representation. It is currently mixed in with `DDLParser` and `SqlParser`. It is also used to parse the data type for `DataFrame.cast` and to convert Hive metastore's data type string back to a `DataType`. JIRA: https://issues.apache.org/jira/browse/SPARK-6250 Author: Yin Huai <yhuai@databricks.com> Closes #5078 from yhuai/ddlKeywords and squashes the following commits: 0e66097 [Yin Huai] Special handle struct<>. fea6012 [Yin Huai] Style. c9733fb [Yin Huai] Create a trait to parse data types.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala15
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala80
2 files changed, 4 insertions, 91 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index b7a13a1b26..ec7d15f5bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -624,20 +624,7 @@ class Column(protected[sql] val expr: Expression) {
*
* @group expr_ops
*/
- def cast(to: String): Column = cast(to.toLowerCase match {
- case "string" | "str" => StringType
- case "boolean" => BooleanType
- case "byte" => ByteType
- case "short" => ShortType
- case "int" => IntegerType
- case "long" => LongType
- case "float" => FloatType
- case "double" => DoubleType
- case "decimal" => DecimalType.Unlimited
- case "date" => DateType
- case "timestamp" => TimestampType
- case _ => throw new RuntimeException(s"""Unsupported cast type: "$to"""")
- })
+ def cast(to: String): Column = cast(DataTypeParser(to))
/**
* Returns an ordering used in sorting.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index d57406645e..d2e807d3a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -34,7 +34,8 @@ import org.apache.spark.util.Utils
* A parser for foreign DDL commands.
*/
private[sql] class DDLParser(
- parseQuery: String => LogicalPlan) extends AbstractSparkSQLParser with Logging {
+ parseQuery: String => LogicalPlan)
+ extends AbstractSparkSQLParser with DataTypeParser with Logging {
def apply(input: String, exceptionOnError: Boolean): Option[LogicalPlan] = {
try {
@@ -46,14 +47,6 @@ private[sql] class DDLParser(
}
}
- def parseType(input: String): DataType = {
- lexical.initialize(reservedWords)
- phrase(dataType)(new lexical.Scanner(input)) match {
- case Success(r, x) => r
- case x => throw new DDLException(s"Unsupported dataType: $x")
- }
- }
-
// Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
// properties via reflection the class in runtime for constructing the SqlLexical object
protected val CREATE = Keyword("CREATE")
@@ -70,24 +63,6 @@ private[sql] class DDLParser(
protected val COMMENT = Keyword("COMMENT")
protected val REFRESH = Keyword("REFRESH")
- // Data types.
- protected val STRING = Keyword("STRING")
- protected val BINARY = Keyword("BINARY")
- protected val BOOLEAN = Keyword("BOOLEAN")
- protected val TINYINT = Keyword("TINYINT")
- protected val SMALLINT = Keyword("SMALLINT")
- protected val INT = Keyword("INT")
- protected val BIGINT = Keyword("BIGINT")
- protected val FLOAT = Keyword("FLOAT")
- protected val DOUBLE = Keyword("DOUBLE")
- protected val DECIMAL = Keyword("DECIMAL")
- protected val DATE = Keyword("DATE")
- protected val TIMESTAMP = Keyword("TIMESTAMP")
- protected val VARCHAR = Keyword("VARCHAR")
- protected val ARRAY = Keyword("ARRAY")
- protected val MAP = Keyword("MAP")
- protected val STRUCT = Keyword("STRUCT")
-
protected lazy val ddl: Parser[LogicalPlan] = createTable | describeTable | refreshTable
protected def start: Parser[LogicalPlan] = ddl
@@ -189,58 +164,9 @@ private[sql] class DDLParser(
new MetadataBuilder().putString(COMMENT.str.toLowerCase, comment).build()
case None => Metadata.empty
}
- StructField(columnName, typ, nullable = true, meta)
- }
-
- protected lazy val primitiveType: Parser[DataType] =
- STRING ^^^ StringType |
- BINARY ^^^ BinaryType |
- BOOLEAN ^^^ BooleanType |
- TINYINT ^^^ ByteType |
- SMALLINT ^^^ ShortType |
- INT ^^^ IntegerType |
- BIGINT ^^^ LongType |
- FLOAT ^^^ FloatType |
- DOUBLE ^^^ DoubleType |
- fixedDecimalType | // decimal with precision/scale
- DECIMAL ^^^ DecimalType.Unlimited | // decimal with no precision/scale
- DATE ^^^ DateType |
- TIMESTAMP ^^^ TimestampType |
- VARCHAR ~ "(" ~ numericLit ~ ")" ^^^ StringType
-
- protected lazy val fixedDecimalType: Parser[DataType] =
- (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
- case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
- }
-
- protected lazy val arrayType: Parser[DataType] =
- ARRAY ~> "<" ~> dataType <~ ">" ^^ {
- case tpe => ArrayType(tpe)
- }
- protected lazy val mapType: Parser[DataType] =
- MAP ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
- case t1 ~ _ ~ t2 => MapType(t1, t2)
- }
-
- protected lazy val structField: Parser[StructField] =
- ident ~ ":" ~ dataType ^^ {
- case fieldName ~ _ ~ tpe => StructField(fieldName, tpe, nullable = true)
+ StructField(columnName, typ, nullable = true, meta)
}
-
- protected lazy val structType: Parser[DataType] =
- (STRUCT ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
- case fields => StructType(fields)
- }) |
- (STRUCT ~> "<>" ^^ {
- case fields => StructType(Nil)
- })
-
- private[sql] lazy val dataType: Parser[DataType] =
- arrayType |
- mapType |
- structType |
- primitiveType
}
private[sql] object ResolvedDataSource {