aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2016-04-29 22:49:12 -0700
committerReynold Xin <rxin@databricks.com>2016-04-29 22:49:12 -0700
commitac41fc648de584f08863313fbac0c5bb6fc6a65e (patch)
treef72da33e155967bda250166f9083940bbe32c845 /sql/catalyst/src
parent7945f9f6d431453a192bea66f66fec813913e4c8 (diff)
downloadspark-ac41fc648de584f08863313fbac0c5bb6fc6a65e.tar.gz
spark-ac41fc648de584f08863313fbac0c5bb6fc6a65e.tar.bz2
spark-ac41fc648de584f08863313fbac0c5bb6fc6a65e.zip
[SPARK-14591][SQL] Remove DataTypeParser and add more keywords to the nonReserved list.
## What changes were proposed in this pull request? CatalystSqlParser can parse data types. So, we do not need to have an individual DataTypeParser. ## How was this patch tested? Existing tests Author: Yin Huai <yhuai@databricks.com> Closes #12796 from yhuai/removeDataTypeParser.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala186
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala8
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala37
4 files changed, 14 insertions, 221 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 5efaf8f201..3851e4c706 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -22,7 +22,7 @@ import javax.annotation.Nullable
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.parser.DataTypeParser
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
@@ -189,7 +189,7 @@ case class SimpleCatalogRelation(
(cols ++ catalogTable.partitionColumns).map { f =>
AttributeReference(
f.name,
- DataTypeParser.parse(f.dataType),
+ CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifier = Some(alias.getOrElse(metadata.identifier.table)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
deleted file mode 100644
index 0eb13c600c..0000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParser.scala
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.parser
-
-import scala.language.implicitConversions
-import scala.util.matching.Regex
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-import scala.util.parsing.input.CharArrayReader._
-
-import org.apache.spark.sql.types._
-
-/**
- * This is a data type parser that can be used to parse string representations of data types
- * provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
- */
-private[sql] trait DataTypeParser extends StandardTokenParsers {
-
- // This is used to create a parser from a regex. We are using regexes for data type strings
- // since these strings can be also used as column names or field names.
- import lexical.Identifier
- implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
- s"identifier matching regex ${regex}",
- { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
- )
-
- protected lazy val primitiveType: Parser[DataType] =
- "(?i)string".r ^^^ StringType |
- "(?i)float".r ^^^ FloatType |
- "(?i)(?:int|integer)".r ^^^ IntegerType |
- "(?i)tinyint".r ^^^ ByteType |
- "(?i)smallint".r ^^^ ShortType |
- "(?i)double".r ^^^ DoubleType |
- "(?i)(?:bigint|long)".r ^^^ LongType |
- "(?i)binary".r ^^^ BinaryType |
- "(?i)boolean".r ^^^ BooleanType |
- fixedDecimalType |
- "(?i)decimal".r ^^^ DecimalType.USER_DEFAULT |
- "(?i)date".r ^^^ DateType |
- "(?i)timestamp".r ^^^ TimestampType |
- varchar |
- char
-
- protected lazy val fixedDecimalType: Parser[DataType] =
- ("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
- case precision ~ scale =>
- DecimalType(precision.toInt, scale.toInt)
- }
-
- protected lazy val char: Parser[DataType] =
- "(?i)char".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
-
- protected lazy val varchar: Parser[DataType] =
- "(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
-
- protected lazy val arrayType: Parser[DataType] =
- "(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
- case tpe => ArrayType(tpe)
- }
-
- protected lazy val mapType: Parser[DataType] =
- "(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
- case t1 ~ _ ~ t2 => MapType(t1, t2)
- }
-
- protected lazy val structField: Parser[StructField] =
- ident ~ ":" ~ dataType ^^ {
- case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
- }
-
- protected lazy val structType: Parser[DataType] =
- ("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
- case fields => new StructType(fields.toArray)
- }) |
- ("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
-
- protected lazy val dataType: Parser[DataType] =
- arrayType |
- mapType |
- structType |
- primitiveType
-
- def toDataType(dataTypeString: String): DataType = synchronized {
- phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
- case Success(result, _) => result
- case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
- }
- }
-
- private def failMessage(dataTypeString: String): String = {
- s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
- "any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
- "Please note that backtick itself is not supported in a field name."
- }
-}
-
-private[sql] object DataTypeParser {
- lazy val dataTypeParser = new DataTypeParser {
- override val lexical = new SqlLexical
- }
-
- def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
-}
-
-/** The exception thrown from the [[DataTypeParser]]. */
-private[sql] class DataTypeException(message: String) extends Exception(message)
-
-class SqlLexical extends scala.util.parsing.combinator.lexical.StdLexical {
- case class DecimalLit(chars: String) extends Token {
- override def toString: String = chars
- }
-
- /* This is a work around to support the lazy setting */
- def initialize(keywords: Seq[String]): Unit = {
- reserved.clear()
- reserved ++= keywords
- }
-
- /* Normal the keyword string */
- def normalizeKeyword(str: String): String = str.toLowerCase
-
- delimiters += (
- "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", "!>", ">=", "!<", ">", "/", "(", ")",
- ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
- )
-
- protected override def processIdent(name: String) = {
- val token = normalizeKeyword(name)
- if (reserved contains token) Keyword(token) else Identifier(name)
- }
-
- override lazy val token: Parser[Token] =
- ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) }
- | '.' ~> (rep1(digit) ~ scientificNotation) ^^
- { case i ~ s => DecimalLit("0." + i.mkString + s) }
- | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^
- { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) }
- | digit.* ~ identChar ~ (identChar | digit).* ^^
- { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
- | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
- case i ~ None => NumericLit(i.mkString)
- case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString)
- }
- | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
- { case chars => StringLit(chars mkString "") }
- | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
- { case chars => StringLit(chars mkString "") }
- | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
- { case chars => Identifier(chars mkString "") }
- | EofCh ^^^ EOF
- | '\'' ~> failure("unclosed string literal")
- | '"' ~> failure("unclosed string literal")
- | delim
- | failure("illegal character")
- )
-
- override def identChar: Parser[Elem] = letter | elem('_')
-
- private lazy val scientificNotation: Parser[String] =
- (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ {
- case s ~ rest => "e" + s.mkString + rest.mkString
- }
-
- override def whitespace: Parser[Any] =
- ( whitespaceChar
- | '/' ~ '*' ~ comment
- | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
- | '#' ~ chrExcept(EofCh, '\n').*
- | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
- | '/' ~ '*' ~ failure("unclosed comment")
- ).*
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 6f50be7a99..b06aa7bc52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
import org.apache.spark.SparkException
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
-import org.apache.spark.sql.catalyst.parser.{DataTypeParser, LegacyTypeStringParser}
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
import org.apache.spark.sql.catalyst.util.quoteIdentifier
/**
@@ -169,7 +169,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
* }}}
*/
def add(name: String, dataType: String): StructType = {
- add(name, DataTypeParser.parse(dataType), nullable = true, Metadata.empty)
+ add(name, CatalystSqlParser.parseDataType(dataType), nullable = true, Metadata.empty)
}
/**
@@ -184,7 +184,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
* }}}
*/
def add(name: String, dataType: String, nullable: Boolean): StructType = {
- add(name, DataTypeParser.parse(dataType), nullable, Metadata.empty)
+ add(name, CatalystSqlParser.parseDataType(dataType), nullable, Metadata.empty)
}
/**
@@ -202,7 +202,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
dataType: String,
nullable: Boolean,
metadata: Metadata): StructType = {
- add(name, DataTypeParser.parse(dataType), nullable, metadata)
+ add(name, CatalystSqlParser.parseDataType(dataType), nullable, metadata)
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 07b89cb61f..40782978a7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.catalyst.parser
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._
-abstract class AbstractDataTypeParserSuite extends SparkFunSuite {
+class CatalystQlDataTypeParserSuite extends SparkFunSuite {
- def parse(sql: String): DataType
+ def parse(sql: String): DataType = CatalystSqlParser.parseDataType(sql)
def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
test(s"parse ${dataTypeString.replace("\n", "")}") {
@@ -30,7 +30,8 @@ abstract class AbstractDataTypeParserSuite extends SparkFunSuite {
}
}
- def intercept(sql: String)
+ def intercept(sql: String): Unit =
+ intercept[ParseException](CatalystSqlParser.parseDataType(sql))
def unsupported(dataTypeString: String): Unit = {
test(s"$dataTypeString is not supported") {
@@ -115,38 +116,16 @@ abstract class AbstractDataTypeParserSuite extends SparkFunSuite {
unsupported("it is not a data type")
unsupported("struct<x+y: int, 1.1:timestamp>")
unsupported("struct<x: int")
-}
-
-class DataTypeParserSuite extends AbstractDataTypeParserSuite {
- override def intercept(sql: String): Unit =
- intercept[DataTypeException](DataTypeParser.parse(sql))
- override def parse(sql: String): DataType =
- DataTypeParser.parse(sql)
-
- // A column name can be a reserved word in our DDL parser and SqlParser.
+ // DataType parser accepts certain reserved keywords.
checkDataType(
- "Struct<TABLE: string, CASE:boolean>",
+ "Struct<TABLE: string, DATE:boolean>",
StructType(
StructField("TABLE", StringType, true) ::
- StructField("CASE", BooleanType, true) :: Nil)
+ StructField("DATE", BooleanType, true) :: Nil)
)
- unsupported("struct<x int, y string>")
-
- unsupported("struct<`x``y` int>")
-}
-
-class CatalystQlDataTypeParserSuite extends AbstractDataTypeParserSuite {
- override def intercept(sql: String): Unit =
- intercept[ParseException](CatalystSqlParser.parseDataType(sql))
-
- override def parse(sql: String): DataType =
- CatalystSqlParser.parseDataType(sql)
-
- // A column name can be a reserved word in our DDL parser and SqlParser.
- unsupported("Struct<TABLE: string, CASE:boolean>")
-
+ // Define struct columns without ':'
checkDataType(
"struct<x int, y string>",
(new StructType).add("x", IntegerType).add("y", StringType))