aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2016-01-30 23:05:29 -0800
committerReynold Xin <rxin@databricks.com>2016-01-30 23:05:29 -0800
commit0e6d92d042b0a2920d8df5959d5913ba0166a678 (patch)
tree0af4073b133a335a7ef9c228e88ee8cf6aa4e456 /sql/catalyst
parenta1303de0a0e9d0c80327977abf52a79e2aa95e1f (diff)
downloadspark-0e6d92d042b0a2920d8df5959d5913ba0166a678.tar.gz
spark-0e6d92d042b0a2920d8df5959d5913ba0166a678.tar.bz2
spark-0e6d92d042b0a2920d8df5959d5913ba0166a678.zip
[SPARK-12689][SQL] Migrate DDL parsing to the newly absorbed parser
JIRA: https://issues.apache.org/jira/browse/SPARK-12689 DDLParser processes three commands: createTable, describeTable and refreshTable. This patch migrates the three commands to newly absorbed parser. Author: Liang-Chi Hsieh <viirya@gmail.com> Author: Liang-Chi Hsieh <viirya@appier.com> Closes #10723 from viirya/migrate-ddl-describe.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g14
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g4
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g80
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala23
4 files changed, 105 insertions, 16 deletions
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
index 0555a6ba83..c162c1a0c5 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
@@ -493,6 +493,16 @@ descFuncNames
| functionIdentifier
;
+//We are allowed to use From and To in CreateTableUsing command's options (actually seems we can use any string as the option key). But we can't simply add them into nonReserved because by doing that we mess other existing rules. So we create a looseIdentifier and looseNonReserved here.
+looseIdentifier
+ :
+ Identifier
+ | looseNonReserved -> Identifier[$looseNonReserved.text]
+ // If it decides to support SQL11 reserved keywords, i.e., useSQL11ReservedKeywordsForIdentifier()=false,
+ // the sql11keywords in existing q tests will NOT be added back.
+ | {useSQL11ReservedKeywordsForIdentifier()}? sql11ReservedKeywordsUsedAsIdentifier -> Identifier[$sql11ReservedKeywordsUsedAsIdentifier.text]
+ ;
+
identifier
:
Identifier
@@ -516,6 +526,10 @@ principalIdentifier
| QuotedIdentifier
;
+looseNonReserved
+ : nonReserved | KW_FROM | KW_TO
+ ;
+
//The new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old version of nonReserved
//Non reserved keywords are basically the keywords that can be used as identifiers.
//All the KW_* are automatically not only keywords, but also reserved keywords.
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
index 4374cd7ef7..e930caa291 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
@@ -324,6 +324,8 @@ KW_ISOLATION: 'ISOLATION';
KW_LEVEL: 'LEVEL';
KW_SNAPSHOT: 'SNAPSHOT';
KW_AUTOCOMMIT: 'AUTOCOMMIT';
+KW_REFRESH: 'REFRESH';
+KW_OPTIONS: 'OPTIONS';
KW_WEEK: 'WEEK'|'WEEKS';
KW_MILLISECOND: 'MILLISECOND'|'MILLISECONDS';
KW_MICROSECOND: 'MICROSECOND'|'MICROSECONDS';
@@ -470,7 +472,7 @@ Identifier
fragment
QuotedIdentifier
:
- '`' ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); }
+ '`' ( '``' | ~('`') )* '`' { setText(getText().replaceAll("``", "`")); }
;
WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
index 35bef00351..6591f6b0f5 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
@@ -142,6 +142,7 @@ TOK_UNIONTYPE;
TOK_COLTYPELIST;
TOK_CREATEDATABASE;
TOK_CREATETABLE;
+TOK_CREATETABLEUSING;
TOK_TRUNCATETABLE;
TOK_CREATEINDEX;
TOK_CREATEINDEX_INDEXTBLNAME;
@@ -371,6 +372,10 @@ TOK_TXN_READ_WRITE;
TOK_COMMIT;
TOK_ROLLBACK;
TOK_SET_AUTOCOMMIT;
+TOK_REFRESHTABLE;
+TOK_TABLEPROVIDER;
+TOK_TABLEOPTIONS;
+TOK_TABLEOPTION;
TOK_CACHETABLE;
TOK_UNCACHETABLE;
TOK_CLEARCACHE;
@@ -660,6 +665,12 @@ import java.util.HashMap;
}
private char [] excludedCharForColumnName = {'.', ':'};
private boolean containExcludedCharForCreateTableColumnName(String input) {
+ if (input.length() > 0) {
+ if (input.charAt(0) == '`' && input.charAt(input.length() - 1) == '`') {
+ // When column name is backquoted, we don't care about excluded chars.
+ return false;
+ }
+ }
for(char c : excludedCharForColumnName) {
if(input.indexOf(c)>-1) {
return true;
@@ -781,6 +792,7 @@ ddlStatement
| truncateTableStatement
| alterStatement
| descStatement
+ | refreshStatement
| showStatement
| metastoreCheck
| createViewStatement
@@ -907,12 +919,31 @@ createTableStatement
@init { pushMsg("create table statement", state); }
@after { popMsg(state); }
: KW_CREATE (temp=KW_TEMPORARY)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName
- ( like=KW_LIKE likeName=tableName
+ (
+ like=KW_LIKE likeName=tableName
tableRowFormat?
tableFileFormat?
tableLocation?
tablePropertiesPrefixed?
+ -> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
+ ^(TOK_LIKETABLE $likeName?)
+ tableRowFormat?
+ tableFileFormat?
+ tableLocation?
+ tablePropertiesPrefixed?
+ )
+ |
+ tableProvider
+ tableOpts?
+ (KW_AS selectStatementWithCTE)?
+ -> ^(TOK_CREATETABLEUSING $name $temp? ifNotExists?
+ tableProvider
+ tableOpts?
+ selectStatementWithCTE?
+ )
| (LPAREN columnNameTypeList RPAREN)?
+ (p=tableProvider?)
+ tableOpts?
tableComment?
tablePartition?
tableBuckets?
@@ -922,8 +953,15 @@ createTableStatement
tableLocation?
tablePropertiesPrefixed?
(KW_AS selectStatementWithCTE)?
- )
- -> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
+ -> {p != null}?
+ ^(TOK_CREATETABLEUSING $name $temp? ifNotExists?
+ columnNameTypeList?
+ $p
+ tableOpts?
+ selectStatementWithCTE?
+ )
+ ->
+ ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
^(TOK_LIKETABLE $likeName?)
columnNameTypeList?
tableComment?
@@ -935,7 +973,8 @@ createTableStatement
tableLocation?
tablePropertiesPrefixed?
selectStatementWithCTE?
- )
+ )
+ )
;
truncateTableStatement
@@ -1379,6 +1418,13 @@ tabPartColTypeExpr
: tableName partitionSpec? extColumnName? -> ^(TOK_TABTYPE tableName partitionSpec? extColumnName?)
;
+refreshStatement
+@init { pushMsg("refresh statement", state); }
+@after { popMsg(state); }
+ :
+ KW_REFRESH KW_TABLE tableName -> ^(TOK_REFRESHTABLE tableName)
+ ;
+
descStatement
@init { pushMsg("describe statement", state); }
@after { popMsg(state); }
@@ -1774,6 +1820,30 @@ showStmtIdentifier
| StringLiteral
;
+tableProvider
+@init { pushMsg("table's provider", state); }
+@after { popMsg(state); }
+ :
+ KW_USING Identifier (DOT Identifier)*
+ -> ^(TOK_TABLEPROVIDER Identifier+)
+ ;
+
+optionKeyValue
+@init { pushMsg("table's option specification", state); }
+@after { popMsg(state); }
+ :
+ (looseIdentifier (DOT looseIdentifier)*) StringLiteral
+ -> ^(TOK_TABLEOPTION looseIdentifier+ StringLiteral)
+ ;
+
+tableOpts
+@init { pushMsg("table's options", state); }
+@after { popMsg(state); }
+ :
+ KW_OPTIONS LPAREN optionKeyValue (COMMA optionKeyValue)* RPAREN
+ -> ^(TOK_TABLEOPTIONS optionKeyValue+)
+ ;
+
tableComment
@init { pushMsg("table's comment", state); }
@after { popMsg(state); }
@@ -2132,7 +2202,7 @@ structType
mapType
@init { pushMsg("map type", state); }
@after { popMsg(state); }
- : KW_MAP LESSTHAN left=primitiveType COMMA right=type GREATERTHAN
+ : KW_MAP LESSTHAN left=type COMMA right=type GREATERTHAN
-> ^(TOK_MAP $left $right)
;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
index 536c292ab7..7ce2407913 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -140,6 +140,7 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
case Token("TOK_BOOLEAN", Nil) => BooleanType
case Token("TOK_STRING", Nil) => StringType
case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
+ case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
case Token("TOK_FLOAT", Nil) => FloatType
case Token("TOK_DOUBLE", Nil) => DoubleType
case Token("TOK_DATE", Nil) => DateType
@@ -156,9 +157,10 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
protected def nodeToStructField(node: ASTNode): StructField = node match {
case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
- StructField(fieldName, nodeToDataType(dataType), nullable = true)
- case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: _ /* comment */:: Nil) =>
- StructField(fieldName, nodeToDataType(dataType), nullable = true)
+ StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
+ case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
+ val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
+ StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
case _ =>
noParseRule("StructField", node)
}
@@ -222,15 +224,16 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case Nil =>
ShowFunctions(None, None)
case Token(name, Nil) :: Nil =>
- ShowFunctions(None, Some(unquoteString(name)))
+ ShowFunctions(None, Some(unquoteString(cleanIdentifier(name))))
case Token(db, Nil) :: Token(name, Nil) :: Nil =>
- ShowFunctions(Some(unquoteString(db)), Some(unquoteString(name)))
+ ShowFunctions(Some(unquoteString(cleanIdentifier(db))),
+ Some(unquoteString(cleanIdentifier(name))))
case _ =>
noParseRule("SHOW FUNCTIONS", node)
}
case Token("TOK_DESCFUNCTION", Token(functionName, Nil) :: isExtended) =>
- DescribeFunction(functionName, isExtended.nonEmpty)
+ DescribeFunction(cleanIdentifier(functionName), isExtended.nonEmpty)
case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | "TOK_INSERT", _) :: _) =>
val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
@@ -611,7 +614,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
noParseRule("Select", node)
}
- protected val escapedIdentifier = "`([^`]+)`".r
+ protected val escapedIdentifier = "`(.+)`".r
protected val doubleQuotedString = "\"([^\"]+)\"".r
protected val singleQuotedString = "'([^']+)'".r
@@ -655,7 +658,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
nodeToExpr(qualifier) match {
case UnresolvedAttribute(nameParts) =>
UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
- case other => UnresolvedExtractValue(other, Literal(attr))
+ case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
}
/* Stars (*) */
@@ -663,7 +666,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
// The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
// has a single child which is tableName.
case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
- UnresolvedStar(Some(target.map(_.text)))
+ UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
/* Aggregate Functions */
case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
@@ -971,7 +974,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
- val alias = getClause("TOK_TABALIAS", clauses).children.head.text
+ val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
val generator = clauses.head match {
case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>