1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.parser
import scala.language.implicitConversions
import scala.util.matching.Regex
import scala.util.parsing.combinator.syntactical.StandardTokenParsers
import org.apache.spark.sql.types._
/**
* This is a data type parser that can be used to parse string representations of data types
* provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
*/
private[sql] trait DataTypeParser extends StandardTokenParsers {
// This is used to create a parser from a regex. We are using regexes for data type strings
// since these strings can be also used as column names or field names.
import lexical.Identifier
implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
s"identifier matching regex ${regex}",
{ case Identifier(str) if regex.unapplySeq(str).isDefined => str }
)
protected lazy val primitiveType: Parser[DataType] =
"(?i)string".r ^^^ StringType |
"(?i)float".r ^^^ FloatType |
"(?i)(?:int|integer)".r ^^^ IntegerType |
"(?i)tinyint".r ^^^ ByteType |
"(?i)smallint".r ^^^ ShortType |
"(?i)double".r ^^^ DoubleType |
"(?i)(?:bigint|long)".r ^^^ LongType |
"(?i)binary".r ^^^ BinaryType |
"(?i)boolean".r ^^^ BooleanType |
fixedDecimalType |
"(?i)decimal".r ^^^ DecimalType.USER_DEFAULT |
"(?i)date".r ^^^ DateType |
"(?i)timestamp".r ^^^ TimestampType |
varchar |
char
protected lazy val fixedDecimalType: Parser[DataType] =
("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
case precision ~ scale =>
DecimalType(precision.toInt, scale.toInt)
}
protected lazy val char: Parser[DataType] =
"(?i)char".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
protected lazy val varchar: Parser[DataType] =
"(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
protected lazy val arrayType: Parser[DataType] =
"(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
case tpe => ArrayType(tpe)
}
protected lazy val mapType: Parser[DataType] =
"(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
case t1 ~ _ ~ t2 => MapType(t1, t2)
}
protected lazy val structField: Parser[StructField] =
ident ~ ":" ~ dataType ^^ {
case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
}
protected lazy val structType: Parser[DataType] =
("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
case fields => new StructType(fields.toArray)
}) |
("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
protected lazy val dataType: Parser[DataType] =
arrayType |
mapType |
structType |
primitiveType
def toDataType(dataTypeString: String): DataType = synchronized {
phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
case Success(result, _) => result
case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
}
}
private def failMessage(dataTypeString: String): String = {
s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
"any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
"Please note that backtick itself is not supported in a field name."
}
}
private[sql] object DataTypeParser {
lazy val dataTypeParser = new DataTypeParser {
override val lexical = new SqlLexical
}
def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
}
/** The exception thrown from the [[DataTypeParser]]. */
private[sql] class DataTypeException(message: String) extends Exception(message)
|