From d060da098aa0449f519fb22c3ed8f75f87ba5f12 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Fri, 22 Apr 2016 11:28:46 -0700 Subject: [SPARK-14762] [SQL] TPCDS Q90 fails to parse ### What changes were proposed in this pull request? TPCDS Q90 fails to parse because it uses a reserved keyword as an Identifier; `AT` was used as an alias for one of the subqueries. `AT` is not a reserved keyword and should have been registerd as a in the `nonReserved` rule. In order to prevent this from happening again I have added tests for all keywords that are non-reserved in Hive. See the `nonReserved`, `sql11ReservedKeywordsUsedAsCastFunctionName` & `sql11ReservedKeywordsUsedAsIdentifier` rules in https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g. ### How was this patch tested? Added tests to for all Hive non reserved keywords to `TableIdentifierParserSuite`. cc davies Author: Herman van Hovell Closes #12537 from hvanhovell/SPARK-14762. --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 7 +-- .../parser/TableIdentifierParserSuite.scala | 50 ++++++++++++++++++++-- 2 files changed, 51 insertions(+), 6 deletions(-) (limited to 'sql') diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index db453aaa6d..1908cea673 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -650,11 +650,14 @@ nonReserved | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT - | INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE + | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER | REVOKE | GRANT | LOCK | UNLOCK | MSCK | REPAIR | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE | ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEX | INDEXES | LOCKS | OPTION | LOCAL | INPATH | ASC | DESC | LIMIT | RENAME | SETS + | AT | NULLS | OVERWRITE | ALL | ALTER | AS | BETWEEN | BY | CREATE | DELETE + | DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE + | NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE ; SELECT: 'SELECT'; @@ -850,8 +853,6 @@ SORTED: 'SORTED'; PURGE: 'PURGE'; INPUTFORMAT: 'INPUTFORMAT'; OUTPUTFORMAT: 'OUTPUTFORMAT'; -INPUTDRIVER: 'INPUTDRIVER'; -OUTPUTDRIVER: 'OUTPUTDRIVER'; DATABASE: 'DATABASE' | 'SCHEMA'; DATABASES: 'DATABASES' | 'SCHEMAS'; DFS: 'DFS'; diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 297b1931a9..bef7d38f1a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -22,21 +22,65 @@ import org.apache.spark.sql.catalyst.TableIdentifier class TableIdentifierParserSuite extends SparkFunSuite { import CatalystSqlParser._ + // Add "$elem$", "$value$" & "$key$" + val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before", + "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection", + "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data", + "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited", + "dependency", "desc", "directories", "directory", "disable", "distribute", + "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first", + "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index", + "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last", + "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin", + "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls", + "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned", + "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly", + "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace", + "replication", "restrict", "rewrite", "role", "roles", "schemas", "second", + "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed", + "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables", + "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive", + "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp", + "view", "while", "year", "work", "transaction", "write", "isolation", "level", + "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint", + "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp", + "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external", + "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in", + "insert", "int", "into", "is", "lateral", "like", "local", "none", "null", + "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", + "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", + "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike", + "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", + "int", "smallint", "timestamp", "at") + + val hiveNonReservedRegression = Seq("left", "right", "left", "right", "full", "inner", "semi", + "union", "except", "intersect", "schema", "database") + test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q")) // Illegal names. - intercept[ParseException](parseTableIdentifier("")) - intercept[ParseException](parseTableIdentifier("d.q.g")) + Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier => + intercept[ParseException](parseTableIdentifier(identifier)) + } + } + test("table identifier - keywords") { // SQL Keywords. - val keywords = Seq("select", "from", "where", "left", "right") + val keywords = Seq("select", "from", "where") ++ hiveNonReservedRegression keywords.foreach { keyword => intercept[ParseException](parseTableIdentifier(keyword)) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } } + + test("table identifier - non reserved keywords") { + // Hive keywords are allowed. + hiveNonReservedKeyword.foreach { nonReserved => + assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved)) + } + } } -- cgit v1.2.3