From d060da098aa0449f519fb22c3ed8f75f87ba5f12 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@questtec.nl>
Date: Fri, 22 Apr 2016 11:28:46 -0700
Subject: [SPARK-14762] [SQL] TPCDS Q90 fails to parse

### What changes were proposed in this pull request?
TPCDS Q90 fails to parse because it uses a reserved keyword as an Identifier; `AT` was used as an alias for one of the subqueries. `AT` is not a reserved keyword and should have been registerd as a in the `nonReserved` rule.

In order to prevent this from happening again I have added tests for all keywords that are non-reserved in Hive. See the `nonReserved`, `sql11ReservedKeywordsUsedAsCastFunctionName` & `sql11ReservedKeywordsUsedAsIdentifier` rules in https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g.

### How was this patch tested?

Added tests to for all Hive non reserved keywords to `TableIdentifierParserSuite`.

cc davies

Author: Herman van Hovell <hvanhovell@questtec.nl>

Closes #12537 from hvanhovell/SPARK-14762.
---
 .../apache/spark/sql/catalyst/parser/SqlBase.g4    |  7 +--
 .../parser/TableIdentifierParserSuite.scala        | 50 ++++++++++++++++++++--
 2 files changed, 51 insertions(+), 6 deletions(-)

(limited to 'sql')

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index db453aaa6d..1908cea673 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -650,11 +650,14 @@ nonReserved
     | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
     | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST
     | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
-    | INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
+    | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
     | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER
     | REVOKE | GRANT | LOCK | UNLOCK | MSCK | REPAIR | EXPORT | IMPORT | LOAD | VALUES | COMMENT | ROLE
     | ROLES | COMPACTIONS | PRINCIPALS | TRANSACTIONS | INDEX | INDEXES | LOCKS | OPTION | LOCAL | INPATH
     | ASC | DESC | LIMIT | RENAME | SETS
+    | AT | NULLS | OVERWRITE | ALL | ALTER | AS | BETWEEN | BY | CREATE | DELETE
+    | DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS |LIKE
+    | NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE
     ;
 
 SELECT: 'SELECT';
@@ -850,8 +853,6 @@ SORTED: 'SORTED';
 PURGE: 'PURGE';
 INPUTFORMAT: 'INPUTFORMAT';
 OUTPUTFORMAT: 'OUTPUTFORMAT';
-INPUTDRIVER: 'INPUTDRIVER';
-OUTPUTDRIVER: 'OUTPUTDRIVER';
 DATABASE: 'DATABASE' | 'SCHEMA';
 DATABASES: 'DATABASES' | 'SCHEMAS';
 DFS: 'DFS';
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 297b1931a9..bef7d38f1a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -22,21 +22,65 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 class TableIdentifierParserSuite extends SparkFunSuite {
   import CatalystSqlParser._
 
+  // Add "$elem$", "$value$" & "$key$"
+  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
+    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
+    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data",
+    "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
+    "dependency", "desc", "directories", "directory", "disable", "distribute",
+    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
+    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
+    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
+    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
+    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
+    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
+    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
+    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
+    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
+    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
+    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
+    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
+    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
+    "view", "while", "year", "work", "transaction", "write", "isolation", "level",
+    "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
+    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
+    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
+    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
+    "insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
+    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
+    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
+    "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
+    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
+    "int", "smallint", "timestamp", "at")
+
+  val hiveNonReservedRegression = Seq("left", "right", "left", "right", "full", "inner", "semi",
+    "union", "except", "intersect", "schema", "database")
+
   test("table identifier") {
     // Regular names.
     assert(TableIdentifier("q") === parseTableIdentifier("q"))
     assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))
 
     // Illegal names.
-    intercept[ParseException](parseTableIdentifier(""))
-    intercept[ParseException](parseTableIdentifier("d.q.g"))
+    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
+      intercept[ParseException](parseTableIdentifier(identifier))
+    }
+  }
 
+  test("table identifier - keywords") {
     // SQL Keywords.
-    val keywords = Seq("select", "from", "where", "left", "right")
+    val keywords = Seq("select", "from", "where") ++ hiveNonReservedRegression
     keywords.foreach { keyword =>
       intercept[ParseException](parseTableIdentifier(keyword))
       assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
       assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
     }
   }
+
+  test("table identifier - non reserved keywords") {
+    // Hive keywords are allowed.
+    hiveNonReservedKeyword.foreach { nonReserved =>
+      assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
+    }
+  }
 }
-- 
cgit v1.2.3