aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-04-22 11:28:46 -0700
committerDavies Liu <davies.liu@gmail.com>2016-04-22 11:28:46 -0700
commitd060da098aa0449f519fb22c3ed8f75f87ba5f12 (patch)
tree4dbc23e2e132035b14b48ce7dc4e888352c6871b /sql
parent1a7fc74ccf1b98d929aa4b2ab45c24d4c3d42c1a (diff)
downloadspark-d060da098aa0449f519fb22c3ed8f75f87ba5f12.tar.gz
spark-d060da098aa0449f519fb22c3ed8f75f87ba5f12.tar.bz2
spark-d060da098aa0449f519fb22c3ed8f75f87ba5f12.zip
[SPARK-14762] [SQL] TPCDS Q90 fails to parse
### What changes were proposed in this pull request? TPCDS Q90 fails to parse because it uses a reserved keyword as an Identifier; `AT` was used as an alias for one of the subqueries. `AT` is not a reserved keyword and should have been registerd as a in the `nonReserved` rule. In order to prevent this from happening again I have added tests for all keywords that are non-reserved in Hive. See the `nonReserved`, `sql11ReservedKeywordsUsedAsCastFunctionName` & `sql11ReservedKeywordsUsedAsIdentifier` rules in https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g. ### How was this patch tested? Added tests to for all Hive non reserved keywords to `TableIdentifierParserSuite`. cc davies Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #12537 from hvanhovell/SPARK-14762.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g47
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala50
2 files changed, 51 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index db453aaa6d..1908cea673 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -650,11 +650,14 @@ nonReserved
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
| EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST
| AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
- | INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
+ | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
| STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER
| REVOKE | GRANT | LOCK | UNLOCK | MSCK | REPAIR | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE
| ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEX | INDEXES | LOCKS | OPTION | LOCAL | INPATH
| ASC | DESC | LIMIT | RENAME | SETS
+ | AT | NULLS | OVERWRITE | ALL | ALTER | AS | BETWEEN | BY | CREATE | DELETE
+ | DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE
+ | NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE
;
SELECT: 'SELECT';
@@ -850,8 +853,6 @@ SORTED: 'SORTED';
PURGE: 'PURGE';
INPUTFORMAT: 'INPUTFORMAT';
OUTPUTFORMAT: 'OUTPUTFORMAT';
-INPUTDRIVER: 'INPUTDRIVER';
-OUTPUTDRIVER: 'OUTPUTDRIVER';
DATABASE: 'DATABASE' | 'SCHEMA';
DATABASES: 'DATABASES' | 'SCHEMAS';
DFS: 'DFS';
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 297b1931a9..bef7d38f1a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -22,21 +22,65 @@ import org.apache.spark.sql.catalyst.TableIdentifier
class TableIdentifierParserSuite extends SparkFunSuite {
import CatalystSqlParser._
+ // Add "$elem$", "$value$" & "$key$"
+ val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
+ "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
+ "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data",
+ "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
+ "dependency", "desc", "directories", "directory", "disable", "distribute",
+ "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
+ "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
+ "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
+ "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
+ "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
+ "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
+ "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
+ "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
+ "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
+ "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
+ "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
+ "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
+ "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
+ "view", "while", "year", "work", "transaction", "write", "isolation", "level",
+ "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
+ "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
+ "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
+ "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
+ "insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
+ "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
+ "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
+ "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
+ "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
+ "int", "smallint", "timestamp", "at")
+
+ val hiveNonReservedRegression = Seq("left", "right", "left", "right", "full", "inner", "semi",
+ "union", "except", "intersect", "schema", "database")
+
test("table identifier") {
// Regular names.
assert(TableIdentifier("q") === parseTableIdentifier("q"))
assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))
// Illegal names.
- intercept[ParseException](parseTableIdentifier(""))
- intercept[ParseException](parseTableIdentifier("d.q.g"))
+ Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
+ intercept[ParseException](parseTableIdentifier(identifier))
+ }
+ }
+ test("table identifier - keywords") {
// SQL Keywords.
- val keywords = Seq("select", "from", "where", "left", "right")
+ val keywords = Seq("select", "from", "where") ++ hiveNonReservedRegression
keywords.foreach { keyword =>
intercept[ParseException](parseTableIdentifier(keyword))
assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
}
}
+
+ test("table identifier - non reserved keywords") {
+ // Hive keywords are allowed.
+ hiveNonReservedKeyword.foreach { nonReserved =>
+ assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
+ }
+ }
}