aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-01-15 15:19:10 -0800
committerReynold Xin <rxin@databricks.com>2016-01-15 15:19:10 -0800
commit7cd7f2202547224593517b392f56e49e4c94cabc (patch)
tree3deb6f260ce94c59d2e25bc29095582dfd637173 /sql/catalyst
parent3f1c58d60b85625ab3abf16456ce27c820453ecf (diff)
downloadspark-7cd7f2202547224593517b392f56e49e4c94cabc.tar.gz
spark-7cd7f2202547224593517b392f56e49e4c94cabc.tar.bz2
spark-7cd7f2202547224593517b392f56e49e4c94cabc.zip
[SPARK-12575][SQL] Grammar parity with existing SQL parser
In this PR the new CatalystQl parser stack reaches grammar parity with the old Parser-Combinator based SQL Parser. This PR also replaces all uses of the old Parser, and removes it from the code base. Although the existing Hive and SQL parser dialects were mostly the same, some kinks had to be worked out: - The SQL Parser allowed syntax like ```APPROXIMATE(0.01) COUNT(DISTINCT a)```. In order to make this work we needed to hardcode approximate operators in the parser, or we would have to create an approximate expression. ```APPROXIMATE_COUNT_DISTINCT(a, 0.01)``` would also do the job and is much easier to maintain. So, this PR **removes** this keyword. - The old SQL Parser supports ```LIMIT``` clauses in nested queries. This is **not supported** anymore. See https://github.com/apache/spark/pull/10689 for the rationale for this. - Hive has a charset name char set literal combination it supports, for instance the following expression ```_ISO-8859-1 0x4341464562616265``` would yield this string: ```CAFEbabe```. Hive will only allow charset names to start with an underscore. This is quite annoying in spark because as soon as you use a tuple names will start with an underscore. In this PR we **remove** this feature from the parser. It would be quite easy to implement such a feature as an Expression later on. - Hive and the SQL Parser treat decimal literals differently. Hive will turn any decimal into a ```Double``` whereas the SQL Parser would convert a non-scientific decimal into a ```BigDecimal```, and would turn a scientific decimal into a Double. We follow Hive's behavior here. The new parser supports a big decimal literal, for instance: ```81923801.42BD```, which can be used when a big decimal is needed. cc rxin viirya marmbrus yhuai cloud-fan Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #10745 from hvanhovell/SPARK-12575-2.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g57
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/FromClauseParser.g7
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g32
-rw-r--r--sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g10
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java31
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala139
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala46
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala509
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala1
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala1
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala150
12 files changed, 132 insertions, 855 deletions
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
index aabb5d4958..047a7e56cb 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
@@ -123,7 +123,6 @@ constant
| SmallintLiteral
| TinyintLiteral
| DecimalLiteral
- | charSetStringLiteral
| booleanValue
;
@@ -132,13 +131,6 @@ stringLiteralSequence
StringLiteral StringLiteral+ -> ^(TOK_STRINGLITERALSEQUENCE StringLiteral StringLiteral+)
;
-charSetStringLiteral
-@init { gParent.pushMsg("character string literal", state); }
-@after { gParent.popMsg(state); }
- :
- csName=CharSetName csLiteral=CharSetLiteral -> ^(TOK_CHARSETLITERAL $csName $csLiteral)
- ;
-
dateLiteral
:
KW_DATE StringLiteral ->
@@ -163,22 +155,38 @@ timestampLiteral
intervalLiteral
:
- KW_INTERVAL StringLiteral qualifiers=intervalQualifiers ->
- {
- adaptor.create($qualifiers.tree.token.getType(), $StringLiteral.text)
+ (KW_INTERVAL intervalConstant KW_YEAR KW_TO KW_MONTH) => KW_INTERVAL intervalConstant KW_YEAR KW_TO KW_MONTH
+ -> ^(TOK_INTERVAL_YEAR_MONTH_LITERAL intervalConstant)
+ | (KW_INTERVAL intervalConstant KW_DAY KW_TO KW_SECOND) => KW_INTERVAL intervalConstant KW_DAY KW_TO KW_SECOND
+ -> ^(TOK_INTERVAL_DAY_TIME_LITERAL intervalConstant)
+ | KW_INTERVAL
+ ((intervalConstant KW_YEAR)=> year=intervalConstant KW_YEAR)?
+ ((intervalConstant KW_MONTH)=> month=intervalConstant KW_MONTH)?
+ ((intervalConstant KW_WEEK)=> week=intervalConstant KW_WEEK)?
+ ((intervalConstant KW_DAY)=> day=intervalConstant KW_DAY)?
+ ((intervalConstant KW_HOUR)=> hour=intervalConstant KW_HOUR)?
+ ((intervalConstant KW_MINUTE)=> minute=intervalConstant KW_MINUTE)?
+ ((intervalConstant KW_SECOND)=> second=intervalConstant KW_SECOND)?
+ (millisecond=intervalConstant KW_MILLISECOND)?
+ (microsecond=intervalConstant KW_MICROSECOND)?
+ -> ^(TOK_INTERVAL
+ ^(TOK_INTERVAL_YEAR_LITERAL $year?)
+ ^(TOK_INTERVAL_MONTH_LITERAL $month?)
+ ^(TOK_INTERVAL_WEEK_LITERAL $week?)
+ ^(TOK_INTERVAL_DAY_LITERAL $day?)
+ ^(TOK_INTERVAL_HOUR_LITERAL $hour?)
+ ^(TOK_INTERVAL_MINUTE_LITERAL $minute?)
+ ^(TOK_INTERVAL_SECOND_LITERAL $second?)
+ ^(TOK_INTERVAL_MILLISECOND_LITERAL $millisecond?)
+ ^(TOK_INTERVAL_MICROSECOND_LITERAL $microsecond?))
+ ;
+
+intervalConstant
+ :
+ sign=(MINUS|PLUS)? value=Number -> {
+ adaptor.create(Number, ($sign != null ? $sign.getText() : "") + $value.getText())
}
- ;
-
-intervalQualifiers
- :
- KW_YEAR KW_TO KW_MONTH -> TOK_INTERVAL_YEAR_MONTH_LITERAL
- | KW_DAY KW_TO KW_SECOND -> TOK_INTERVAL_DAY_TIME_LITERAL
- | KW_YEAR -> TOK_INTERVAL_YEAR_LITERAL
- | KW_MONTH -> TOK_INTERVAL_MONTH_LITERAL
- | KW_DAY -> TOK_INTERVAL_DAY_LITERAL
- | KW_HOUR -> TOK_INTERVAL_HOUR_LITERAL
- | KW_MINUTE -> TOK_INTERVAL_MINUTE_LITERAL
- | KW_SECOND -> TOK_INTERVAL_SECOND_LITERAL
+ | StringLiteral
;
expression
@@ -219,7 +227,8 @@ nullCondition
precedenceUnaryPrefixExpression
:
- (precedenceUnaryOperator^)* precedenceFieldExpression
+ (precedenceUnaryOperator+)=> precedenceUnaryOperator^ precedenceUnaryPrefixExpression
+ | precedenceFieldExpression
;
precedenceUnarySuffixExpression
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/FromClauseParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/FromClauseParser.g
index 972c52e3ff..6d76afcd4a 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/FromClauseParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/FromClauseParser.g
@@ -206,11 +206,8 @@ tableName
@init { gParent.pushMsg("table name", state); }
@after { gParent.popMsg(state); }
:
- db=identifier DOT tab=identifier
- -> ^(TOK_TABNAME $db $tab)
- |
- tab=identifier
- -> ^(TOK_TABNAME $tab)
+ id1=identifier (DOT id2=identifier)?
+ -> ^(TOK_TABNAME $id1 $id2?)
;
viewName
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
index 44a63fbef2..ee2882e51c 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
@@ -307,12 +307,12 @@ KW_AUTHORIZATION: 'AUTHORIZATION';
KW_CONF: 'CONF';
KW_VALUES: 'VALUES';
KW_RELOAD: 'RELOAD';
-KW_YEAR: 'YEAR';
-KW_MONTH: 'MONTH';
-KW_DAY: 'DAY';
-KW_HOUR: 'HOUR';
-KW_MINUTE: 'MINUTE';
-KW_SECOND: 'SECOND';
+KW_YEAR: 'YEAR'|'YEARS';
+KW_MONTH: 'MONTH'|'MONTHS';
+KW_DAY: 'DAY'|'DAYS';
+KW_HOUR: 'HOUR'|'HOURS';
+KW_MINUTE: 'MINUTE'|'MINUTES';
+KW_SECOND: 'SECOND'|'SECONDS';
KW_START: 'START';
KW_TRANSACTION: 'TRANSACTION';
KW_COMMIT: 'COMMIT';
@@ -324,6 +324,9 @@ KW_ISOLATION: 'ISOLATION';
KW_LEVEL: 'LEVEL';
KW_SNAPSHOT: 'SNAPSHOT';
KW_AUTOCOMMIT: 'AUTOCOMMIT';
+KW_WEEK: 'WEEK'|'WEEKS';
+KW_MILLISECOND: 'MILLISECOND'|'MILLISECONDS';
+KW_MICROSECOND: 'MICROSECOND'|'MICROSECONDS';
// Operators
// NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.
@@ -400,12 +403,6 @@ StringLiteral
)+
;
-CharSetLiteral
- :
- StringLiteral
- | '0' 'X' (HexDigit|Digit)+
- ;
-
BigintLiteral
:
(Digit)+ 'L'
@@ -433,7 +430,7 @@ ByteLengthLiteral
Number
:
- (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
+ ((Digit+ (DOT Digit*)?) | (DOT Digit+)) Exponent?
;
/*
@@ -456,10 +453,10 @@ An Identifier can be:
- macro name
- hint name
- window name
-*/
+*/
Identifier
:
- (Letter | Digit) (Letter | Digit | '_')*
+ (Letter | Digit | '_')+
| {allowQuotedId()}? QuotedIdentifier /* though at the language level we allow all Identifiers to be QuotedIdentifiers;
at the API level only columns are allowed to be of this form */
| '`' RegexComponent+ '`'
@@ -471,11 +468,6 @@ QuotedIdentifier
'`' ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); }
;
-CharSetName
- :
- '_' (Letter | Digit | '_' | '-' | '.' | ':' )+
- ;
-
WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}
;
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
index 2c13d3056f..c146ca5914 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
@@ -116,16 +116,20 @@ TOK_DATELITERAL;
TOK_DATETIME;
TOK_TIMESTAMP;
TOK_TIMESTAMPLITERAL;
+TOK_INTERVAL;
TOK_INTERVAL_YEAR_MONTH;
TOK_INTERVAL_YEAR_MONTH_LITERAL;
TOK_INTERVAL_DAY_TIME;
TOK_INTERVAL_DAY_TIME_LITERAL;
TOK_INTERVAL_YEAR_LITERAL;
TOK_INTERVAL_MONTH_LITERAL;
+TOK_INTERVAL_WEEK_LITERAL;
TOK_INTERVAL_DAY_LITERAL;
TOK_INTERVAL_HOUR_LITERAL;
TOK_INTERVAL_MINUTE_LITERAL;
TOK_INTERVAL_SECOND_LITERAL;
+TOK_INTERVAL_MILLISECOND_LITERAL;
+TOK_INTERVAL_MICROSECOND_LITERAL;
TOK_STRING;
TOK_CHAR;
TOK_VARCHAR;
@@ -228,7 +232,6 @@ TOK_TMP_FILE;
TOK_TABSORTCOLNAMEASC;
TOK_TABSORTCOLNAMEDESC;
TOK_STRINGLITERALSEQUENCE;
-TOK_CHARSETLITERAL;
TOK_CREATEFUNCTION;
TOK_DROPFUNCTION;
TOK_RELOADFUNCTION;
@@ -509,7 +512,9 @@ import java.util.HashMap;
xlateMap.put("KW_UPDATE", "UPDATE");
xlateMap.put("KW_VALUES", "VALUES");
xlateMap.put("KW_PURGE", "PURGE");
-
+ xlateMap.put("KW_WEEK", "WEEK");
+ xlateMap.put("KW_MILLISECOND", "MILLISECOND");
+ xlateMap.put("KW_MICROSECOND", "MICROSECOND");
// Operators
xlateMap.put("DOT", ".");
@@ -2078,6 +2083,7 @@ primitiveType
| KW_SMALLINT -> TOK_SMALLINT
| KW_INT -> TOK_INT
| KW_BIGINT -> TOK_BIGINT
+ | KW_LONG -> TOK_BIGINT
| KW_BOOLEAN -> TOK_BOOLEAN
| KW_FLOAT -> TOK_FLOAT
| KW_DOUBLE -> TOK_DOUBLE
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java
index 5bc87b680f..2520c7bb8d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java
@@ -18,12 +18,10 @@
package org.apache.spark.sql.catalyst.parser;
-import java.io.UnsupportedEncodingException;
-
/**
* A couple of utility methods that help with parsing ASTs.
*
- * Both methods in this class were take from the SemanticAnalyzer in Hive:
+ * The 'unescapeSQLString' method in this class was take from the SemanticAnalyzer in Hive:
* ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
*/
public final class ParseUtils {
@@ -31,33 +29,6 @@ public final class ParseUtils {
super();
}
- public static String charSetString(String charSetName, String charSetString)
- throws UnsupportedEncodingException {
- // The character set name starts with a _, so strip that
- charSetName = charSetName.substring(1);
- if (charSetString.charAt(0) == '\'') {
- return new String(unescapeSQLString(charSetString).getBytes(), charSetName);
- } else // hex input is also supported
- {
- assert charSetString.charAt(0) == '0';
- assert charSetString.charAt(1) == 'x';
- charSetString = charSetString.substring(2);
-
- byte[] bArray = new byte[charSetString.length() / 2];
- int j = 0;
- for (int i = 0; i < charSetString.length(); i += 2) {
- int val = Character.digit(charSetString.charAt(i), 16) * 16
- + Character.digit(charSetString.charAt(i + 1), 16);
- if (val > 127) {
- val = val - 256;
- }
- bArray[j++] = (byte)val;
- }
-
- return new String(bArray, charSetName);
- }
- }
-
private static final int[] multiplier = new int[] {1000, 100, 10, 1};
@SuppressWarnings("nls")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
index bdc52c08ac..9443369808 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
@@ -26,9 +26,9 @@ import scala.util.parsing.input.CharArrayReader.EofCh
import org.apache.spark.sql.catalyst.plans.logical._
private[sql] abstract class AbstractSparkSQLParser
- extends StandardTokenParsers with PackratParsers {
+ extends StandardTokenParsers with PackratParsers with ParserDialect {
- def parse(input: String): LogicalPlan = synchronized {
+ def parsePlan(input: String): LogicalPlan = synchronized {
// Initialize the Keywords.
initLexical
phrase(start)(new lexical.Scanner(input)) match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
index d0fbdacf6e..c1591ecfe2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -30,16 +30,10 @@ import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.util.random.RandomSampler
-private[sql] object CatalystQl {
- val parser = new CatalystQl
- def parseExpression(sql: String): Expression = parser.parseExpression(sql)
- def parseTableIdentifier(sql: String): TableIdentifier = parser.parseTableIdentifier(sql)
-}
-
/**
* This class translates a HQL String to a Catalyst [[LogicalPlan]] or [[Expression]].
*/
-private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) {
+private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends ParserDialect {
object Token {
def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
CurrentOrigin.setPosition(node.line, node.positionInLine)
@@ -611,13 +605,6 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case plainIdent => plainIdent
}
- val numericAstTypes = Seq(
- SparkSqlParser.Number,
- SparkSqlParser.TinyintLiteral,
- SparkSqlParser.SmallintLiteral,
- SparkSqlParser.BigintLiteral,
- SparkSqlParser.DecimalLiteral)
-
/* Case insensitive matches */
val COUNT = "(?i)COUNT".r
val SUM = "(?i)SUM".r
@@ -635,6 +622,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
val WHEN = "(?i)WHEN".r
val CASE = "(?i)CASE".r
+ val INTEGRAL = "[+-]?\\d+".r
+
protected def nodeToExpr(node: ASTNode): Expression = node match {
/* Attribute References */
case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
@@ -650,8 +639,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
// The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
// has a single child which is tableName.
- case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", Token(name, Nil) :: Nil) :: Nil) =>
- UnresolvedStar(Some(UnresolvedAttribute.parseAttributeName(name)))
+ case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
+ UnresolvedStar(Some(target.map(_.text)))
/* Aggregate Functions */
case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
@@ -787,71 +776,71 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
Literal(strings.map(s => ParseUtils.unescapeSQLString(s.text)).mkString)
- // This code is adapted from
- // /ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223
- case ast: ASTNode if numericAstTypes contains ast.tokenType =>
- var v: Literal = null
- try {
- if (ast.text.endsWith("L")) {
- // Literal bigint.
- v = Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
- } else if (ast.text.endsWith("S")) {
- // Literal smallint.
- v = Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, ShortType)
- } else if (ast.text.endsWith("Y")) {
- // Literal tinyint.
- v = Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, ByteType)
- } else if (ast.text.endsWith("BD") || ast.text.endsWith("D")) {
- // Literal decimal
- val strVal = ast.text.stripSuffix("D").stripSuffix("B")
- v = Literal(Decimal(strVal))
- } else {
- v = Literal.create(ast.text.toDouble, DoubleType)
- v = Literal.create(ast.text.toLong, LongType)
- v = Literal.create(ast.text.toInt, IntegerType)
- }
- } catch {
- case nfe: NumberFormatException => // Do nothing
- }
-
- if (v == null) {
- sys.error(s"Failed to parse number '${ast.text}'.")
- } else {
- v
- }
-
- case ast: ASTNode if ast.tokenType == SparkSqlParser.StringLiteral =>
- Literal(ParseUtils.unescapeSQLString(ast.text))
+ case ast if ast.tokenType == SparkSqlParser.TinyintLiteral =>
+ Literal.create(ast.text.substring(0, ast.text.length() - 1).toByte, ByteType)
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
- Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
+ case ast if ast.tokenType == SparkSqlParser.SmallintLiteral =>
+ Literal.create(ast.text.substring(0, ast.text.length() - 1).toShort, ShortType)
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_CHARSETLITERAL =>
- Literal(ParseUtils.charSetString(ast.children.head.text, ast.children(1).text))
+ case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
+ Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
- Literal(CalendarInterval.fromYearMonthString(ast.text))
+ case ast if ast.tokenType == SparkSqlParser.DecimalLiteral =>
+ Literal(Decimal(ast.text.substring(0, ast.text.length() - 2)))
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
- Literal(CalendarInterval.fromDayTimeString(ast.text))
-
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL =>
- Literal(CalendarInterval.fromSingleUnitString("year", ast.text))
-
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL =>
- Literal(CalendarInterval.fromSingleUnitString("month", ast.text))
-
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_LITERAL =>
- Literal(CalendarInterval.fromSingleUnitString("day", ast.text))
-
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL =>
- Literal(CalendarInterval.fromSingleUnitString("hour", ast.text))
+ case ast if ast.tokenType == SparkSqlParser.Number =>
+ val text = ast.text
+ text match {
+ case INTEGRAL() =>
+ BigDecimal(text) match {
+ case v if v.isValidInt =>
+ Literal(v.intValue())
+ case v if v.isValidLong =>
+ Literal(v.longValue())
+ case v => Literal(v.underlying())
+ }
+ case _ =>
+ Literal(text.toDouble)
+ }
+ case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
+ Literal(ParseUtils.unescapeSQLString(ast.text))
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL =>
- Literal(CalendarInterval.fromSingleUnitString("minute", ast.text))
+ case ast if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
+ Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
- case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL =>
- Literal(CalendarInterval.fromSingleUnitString("second", ast.text))
+ case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
+ Literal(CalendarInterval.fromYearMonthString(ast.children.head.text))
+
+ case ast if ast.tokenType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
+ Literal(CalendarInterval.fromDayTimeString(ast.children.head.text))
+
+ case Token("TOK_INTERVAL", elements) =>
+ var interval = new CalendarInterval(0, 0)
+ var updated = false
+ elements.foreach {
+ // The interval node will always contain children for all possible time units. A child node
+ // is only useful when it contains exactly one (numeric) child.
+ case e @ Token(name, Token(value, Nil) :: Nil) =>
+ val unit = name match {
+ case "TOK_INTERVAL_YEAR_LITERAL" => "year"
+ case "TOK_INTERVAL_MONTH_LITERAL" => "month"
+ case "TOK_INTERVAL_WEEK_LITERAL" => "week"
+ case "TOK_INTERVAL_DAY_LITERAL" => "day"
+ case "TOK_INTERVAL_HOUR_LITERAL" => "hour"
+ case "TOK_INTERVAL_MINUTE_LITERAL" => "minute"
+ case "TOK_INTERVAL_SECOND_LITERAL" => "second"
+ case "TOK_INTERVAL_MILLISECOND_LITERAL" => "millisecond"
+ case "TOK_INTERVAL_MICROSECOND_LITERAL" => "microsecond"
+ case _ => noParseRule(s"Interval($name)", e)
+ }
+ interval = interval.add(CalendarInterval.fromSingleUnitString(unit, value))
+ updated = true
+ case _ =>
+ }
+ if (!updated) {
+ throw new AnalysisException("at least one time unit should be given for interval literal")
+ }
+ Literal(interval)
case _ =>
noParseRule("Expression", node)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
index e21d3c0546..7d9fbf2f12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
@@ -18,52 +18,22 @@
package org.apache.spark.sql.catalyst
import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
/**
* Root class of SQL Parser Dialect, and we don't guarantee the binary
* compatibility for the future release, let's keep it as the internal
* interface for advanced user.
- *
*/
@DeveloperApi
-abstract class ParserDialect {
- // this is the main function that will be implemented by sql parser.
- def parse(sqlText: String): LogicalPlan
-}
+trait ParserDialect {
+ /** Creates LogicalPlan for a given SQL string. */
+ def parsePlan(sqlText: String): LogicalPlan
-/**
- * Currently we support the default dialect named "sql", associated with the class
- * [[DefaultParserDialect]]
- *
- * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
- * {{{
- *-- switch to "hiveql" dialect
- * spark-sql>SET spark.sql.dialect=hiveql;
- * spark-sql>SELECT * FROM src LIMIT 1;
- *
- *-- switch to "sql" dialect
- * spark-sql>SET spark.sql.dialect=sql;
- * spark-sql>SELECT * FROM src LIMIT 1;
- *
- *-- register the new SQL dialect
- * spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect;
- * spark-sql> SELECT * FROM src LIMIT 1;
- *
- *-- register the non-exist SQL dialect
- * spark-sql> SET spark.sql.dialect=NotExistedClass;
- * spark-sql> SELECT * FROM src LIMIT 1;
- *
- *-- Exception will be thrown and switch to dialect
- *-- "sql" (for SQLContext) or
- *-- "hiveql" (for HiveContext)
- * }}}
- */
-private[spark] class DefaultParserDialect extends ParserDialect {
- @transient
- protected val sqlParser = SqlParser
+ /** Creates Expression for a given SQL string. */
+ def parseExpression(sqlText: String): Expression
- override def parse(sqlText: String): LogicalPlan = {
- sqlParser.parse(sqlText)
- }
+ /** Creates TableIdentifier for a given SQL string. */
+ def parseTableIdentifier(sqlText: String): TableIdentifier
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
deleted file mode 100644
index 85ff4ea0c9..0000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import scala.language.implicitConversions
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.DataTypeParser
-import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
-
-/**
- * A very simple SQL parser. Based loosely on:
- * https://github.com/stephentu/scala-sql-parser/blob/master/src/main/scala/parser.scala
- *
- * Limitations:
- * - Only supports a very limited subset of SQL.
- *
- * This is currently included mostly for illustrative purposes. Users wanting more complete support
- * for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
- */
-object SqlParser extends AbstractSparkSQLParser with DataTypeParser {
-
- def parseExpression(input: String): Expression = synchronized {
- // Initialize the Keywords.
- initLexical
- phrase(projection)(new lexical.Scanner(input)) match {
- case Success(plan, _) => plan
- case failureOrError => sys.error(failureOrError.toString)
- }
- }
-
- def parseTableIdentifier(input: String): TableIdentifier = synchronized {
- // Initialize the Keywords.
- initLexical
- phrase(tableIdentifier)(new lexical.Scanner(input)) match {
- case Success(ident, _) => ident
- case failureOrError => sys.error(failureOrError.toString)
- }
- }
-
- // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
- // properties via reflection the class in runtime for constructing the SqlLexical object
- protected val ALL = Keyword("ALL")
- protected val AND = Keyword("AND")
- protected val APPROXIMATE = Keyword("APPROXIMATE")
- protected val AS = Keyword("AS")
- protected val ASC = Keyword("ASC")
- protected val BETWEEN = Keyword("BETWEEN")
- protected val BY = Keyword("BY")
- protected val CASE = Keyword("CASE")
- protected val CAST = Keyword("CAST")
- protected val DESC = Keyword("DESC")
- protected val DISTINCT = Keyword("DISTINCT")
- protected val ELSE = Keyword("ELSE")
- protected val END = Keyword("END")
- protected val EXCEPT = Keyword("EXCEPT")
- protected val FALSE = Keyword("FALSE")
- protected val FROM = Keyword("FROM")
- protected val FULL = Keyword("FULL")
- protected val GROUP = Keyword("GROUP")
- protected val HAVING = Keyword("HAVING")
- protected val IN = Keyword("IN")
- protected val INNER = Keyword("INNER")
- protected val INSERT = Keyword("INSERT")
- protected val INTERSECT = Keyword("INTERSECT")
- protected val INTERVAL = Keyword("INTERVAL")
- protected val INTO = Keyword("INTO")
- protected val IS = Keyword("IS")
- protected val JOIN = Keyword("JOIN")
- protected val LEFT = Keyword("LEFT")
- protected val LIKE = Keyword("LIKE")
- protected val LIMIT = Keyword("LIMIT")
- protected val NOT = Keyword("NOT")
- protected val NULL = Keyword("NULL")
- protected val ON = Keyword("ON")
- protected val OR = Keyword("OR")
- protected val ORDER = Keyword("ORDER")
- protected val SORT = Keyword("SORT")
- protected val OUTER = Keyword("OUTER")
- protected val OVERWRITE = Keyword("OVERWRITE")
- protected val REGEXP = Keyword("REGEXP")
- protected val RIGHT = Keyword("RIGHT")
- protected val RLIKE = Keyword("RLIKE")
- protected val SELECT = Keyword("SELECT")
- protected val SEMI = Keyword("SEMI")
- protected val TABLE = Keyword("TABLE")
- protected val THEN = Keyword("THEN")
- protected val TRUE = Keyword("TRUE")
- protected val UNION = Keyword("UNION")
- protected val WHEN = Keyword("WHEN")
- protected val WHERE = Keyword("WHERE")
- protected val WITH = Keyword("WITH")
-
- protected lazy val start: Parser[LogicalPlan] =
- start1 | insert | cte
-
- protected lazy val start1: Parser[LogicalPlan] =
- (select | ("(" ~> select <~ ")")) *
- ( UNION ~ ALL ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) }
- | INTERSECT ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Intersect(q1, q2) }
- | EXCEPT ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Except(q1, q2)}
- | UNION ~ DISTINCT.? ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Distinct(Union(q1, q2)) }
- )
-
- protected lazy val select: Parser[LogicalPlan] =
- SELECT ~> DISTINCT.? ~
- repsep(projection, ",") ~
- (FROM ~> relations).? ~
- (WHERE ~> expression).? ~
- (GROUP ~ BY ~> rep1sep(expression, ",")).? ~
- (HAVING ~> expression).? ~
- sortType.? ~
- (LIMIT ~> expression).? ^^ {
- case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l =>
- val base = r.getOrElse(OneRowRelation)
- val withFilter = f.map(Filter(_, base)).getOrElse(base)
- val withProjection = g
- .map(Aggregate(_, p.map(UnresolvedAlias(_)), withFilter))
- .getOrElse(Project(p.map(UnresolvedAlias(_)), withFilter))
- val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
- val withHaving = h.map(Filter(_, withDistinct)).getOrElse(withDistinct)
- val withOrder = o.map(_(withHaving)).getOrElse(withHaving)
- val withLimit = l.map(Limit(_, withOrder)).getOrElse(withOrder)
- withLimit
- }
-
- protected lazy val insert: Parser[LogicalPlan] =
- INSERT ~> (OVERWRITE ^^^ true | INTO ^^^ false) ~ (TABLE ~> relation) ~ select ^^ {
- case o ~ r ~ s => InsertIntoTable(r, Map.empty[String, Option[String]], s, o, false)
- }
-
- protected lazy val cte: Parser[LogicalPlan] =
- WITH ~> rep1sep(ident ~ ( AS ~ "(" ~> start1 <~ ")"), ",") ~ (start1 | insert) ^^ {
- case r ~ s => With(s, r.map({case n ~ s => (n, Subquery(n, s))}).toMap)
- }
-
- protected lazy val projection: Parser[Expression] =
- expression ~ (AS.? ~> ident.?) ^^ {
- case e ~ a => a.fold(e)(Alias(e, _)())
- }
-
- // Based very loosely on the MySQL Grammar.
- // http://dev.mysql.com/doc/refman/5.0/en/join.html
- protected lazy val relations: Parser[LogicalPlan] =
- ( relation ~ rep1("," ~> relation) ^^ {
- case r1 ~ joins => joins.foldLeft(r1) { case(lhs, r) => Join(lhs, r, Inner, None) } }
- | relation
- )
-
- protected lazy val relation: Parser[LogicalPlan] =
- joinedRelation | relationFactor
-
- protected lazy val relationFactor: Parser[LogicalPlan] =
- ( tableIdentifier ~ (opt(AS) ~> opt(ident)) ^^ {
- case tableIdent ~ alias => UnresolvedRelation(tableIdent, alias)
- }
- | ("(" ~> start <~ ")") ~ (AS.? ~> ident) ^^ { case s ~ a => Subquery(a, s) }
- )
-
- protected lazy val joinedRelation: Parser[LogicalPlan] =
- relationFactor ~ rep1(joinType.? ~ (JOIN ~> relationFactor) ~ joinConditions.?) ^^ {
- case r1 ~ joins =>
- joins.foldLeft(r1) { case (lhs, jt ~ rhs ~ cond) =>
- Join(lhs, rhs, joinType = jt.getOrElse(Inner), cond)
- }
- }
-
- protected lazy val joinConditions: Parser[Expression] =
- ON ~> expression
-
- protected lazy val joinType: Parser[JoinType] =
- ( INNER ^^^ Inner
- | LEFT ~ SEMI ^^^ LeftSemi
- | LEFT ~ OUTER.? ^^^ LeftOuter
- | RIGHT ~ OUTER.? ^^^ RightOuter
- | FULL ~ OUTER.? ^^^ FullOuter
- )
-
- protected lazy val sortType: Parser[LogicalPlan => LogicalPlan] =
- ( ORDER ~ BY ~> ordering ^^ { case o => l: LogicalPlan => Sort(o, true, l) }
- | SORT ~ BY ~> ordering ^^ { case o => l: LogicalPlan => Sort(o, false, l) }
- )
-
- protected lazy val ordering: Parser[Seq[SortOrder]] =
- ( rep1sep(expression ~ direction.?, ",") ^^ {
- case exps => exps.map(pair => SortOrder(pair._1, pair._2.getOrElse(Ascending)))
- }
- )
-
- protected lazy val direction: Parser[SortDirection] =
- ( ASC ^^^ Ascending
- | DESC ^^^ Descending
- )
-
- protected lazy val expression: Parser[Expression] =
- orExpression
-
- protected lazy val orExpression: Parser[Expression] =
- andExpression * (OR ^^^ { (e1: Expression, e2: Expression) => Or(e1, e2) })
-
- protected lazy val andExpression: Parser[Expression] =
- notExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1, e2) })
-
- protected lazy val notExpression: Parser[Expression] =
- NOT.? ~ comparisonExpression ^^ { case maybeNot ~ e => maybeNot.map(_ => Not(e)).getOrElse(e) }
-
- protected lazy val comparisonExpression: Parser[Expression] =
- ( termExpression ~ ("=" ~> termExpression) ^^ { case e1 ~ e2 => EqualTo(e1, e2) }
- | termExpression ~ ("<" ~> termExpression) ^^ { case e1 ~ e2 => LessThan(e1, e2) }
- | termExpression ~ ("<=" ~> termExpression) ^^ { case e1 ~ e2 => LessThanOrEqual(e1, e2) }
- | termExpression ~ (">" ~> termExpression) ^^ { case e1 ~ e2 => GreaterThan(e1, e2) }
- | termExpression ~ (">=" ~> termExpression) ^^ { case e1 ~ e2 => GreaterThanOrEqual(e1, e2) }
- | termExpression ~ ("!=" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
- | termExpression ~ ("<>" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
- | termExpression ~ ("<=>" ~> termExpression) ^^ { case e1 ~ e2 => EqualNullSafe(e1, e2) }
- | termExpression ~ NOT.? ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ {
- case e ~ not ~ el ~ eu =>
- val betweenExpr: Expression = And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu))
- not.fold(betweenExpr)(f => Not(betweenExpr))
- }
- | termExpression ~ (RLIKE ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
- | termExpression ~ (REGEXP ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
- | termExpression ~ (LIKE ~> termExpression) ^^ { case e1 ~ e2 => Like(e1, e2) }
- | termExpression ~ (NOT ~ LIKE ~> termExpression) ^^ { case e1 ~ e2 => Not(Like(e1, e2)) }
- | termExpression ~ (IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
- case e1 ~ e2 => In(e1, e2)
- }
- | termExpression ~ (NOT ~ IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
- case e1 ~ e2 => Not(In(e1, e2))
- }
- | termExpression <~ IS ~ NULL ^^ { case e => IsNull(e) }
- | termExpression <~ IS ~ NOT ~ NULL ^^ { case e => IsNotNull(e) }
- | termExpression
- )
-
- protected lazy val termExpression: Parser[Expression] =
- productExpression *
- ( "+" ^^^ { (e1: Expression, e2: Expression) => Add(e1, e2) }
- | "-" ^^^ { (e1: Expression, e2: Expression) => Subtract(e1, e2) }
- )
-
- protected lazy val productExpression: Parser[Expression] =
- baseExpression *
- ( "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1, e2) }
- | "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1, e2) }
- | "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1, e2) }
- | "&" ^^^ { (e1: Expression, e2: Expression) => BitwiseAnd(e1, e2) }
- | "|" ^^^ { (e1: Expression, e2: Expression) => BitwiseOr(e1, e2) }
- | "^" ^^^ { (e1: Expression, e2: Expression) => BitwiseXor(e1, e2) }
- )
-
- protected lazy val function: Parser[Expression] =
- ( ident <~ ("(" ~ "*" ~ ")") ^^ { case udfName =>
- if (lexical.normalizeKeyword(udfName) == "count") {
- AggregateExpression(Count(Literal(1)), mode = Complete, isDistinct = false)
- } else {
- throw new AnalysisException(s"invalid expression $udfName(*)")
- }
- }
- | ident ~ ("(" ~> repsep(expression, ",")) <~ ")" ^^
- { case udfName ~ exprs => UnresolvedFunction(udfName, exprs, isDistinct = false) }
- | ident ~ ("(" ~ DISTINCT ~> repsep(expression, ",")) <~ ")" ^^ { case udfName ~ exprs =>
- lexical.normalizeKeyword(udfName) match {
- case "count" =>
- aggregate.Count(exprs).toAggregateExpression(isDistinct = true)
- case _ => UnresolvedFunction(udfName, exprs, isDistinct = true)
- }
- }
- | APPROXIMATE ~> ident ~ ("(" ~ DISTINCT ~> expression <~ ")") ^^ { case udfName ~ exp =>
- if (lexical.normalizeKeyword(udfName) == "count") {
- AggregateExpression(new HyperLogLogPlusPlus(exp), mode = Complete, isDistinct = false)
- } else {
- throw new AnalysisException(s"invalid function approximate $udfName")
- }
- }
- | APPROXIMATE ~> "(" ~> unsignedFloat ~ ")" ~ ident ~ "(" ~ DISTINCT ~ expression <~ ")" ^^
- { case s ~ _ ~ udfName ~ _ ~ _ ~ exp =>
- if (lexical.normalizeKeyword(udfName) == "count") {
- AggregateExpression(
- HyperLogLogPlusPlus(exp, s.toDouble, 0, 0),
- mode = Complete,
- isDistinct = false)
- } else {
- throw new AnalysisException(s"invalid function approximate($s) $udfName")
- }
- }
- | CASE ~> whenThenElse ^^
- { case branches => CaseWhen.createFromParser(branches) }
- | CASE ~> expression ~ whenThenElse ^^
- { case keyPart ~ branches => CaseKeyWhen(keyPart, branches) }
- )
-
- protected lazy val whenThenElse: Parser[List[Expression]] =
- rep1(WHEN ~> expression ~ (THEN ~> expression)) ~ (ELSE ~> expression).? <~ END ^^ {
- case altPart ~ elsePart =>
- altPart.flatMap { case whenExpr ~ thenExpr =>
- Seq(whenExpr, thenExpr)
- } ++ elsePart
- }
-
- protected lazy val cast: Parser[Expression] =
- CAST ~ "(" ~> expression ~ (AS ~> dataType) <~ ")" ^^ {
- case exp ~ t => Cast(exp, t)
- }
-
- protected lazy val literal: Parser[Literal] =
- ( numericLiteral
- | booleanLiteral
- | stringLit ^^ { case s => Literal.create(s, StringType) }
- | intervalLiteral
- | NULL ^^^ Literal.create(null, NullType)
- )
-
- protected lazy val booleanLiteral: Parser[Literal] =
- ( TRUE ^^^ Literal.create(true, BooleanType)
- | FALSE ^^^ Literal.create(false, BooleanType)
- )
-
- protected lazy val numericLiteral: Parser[Literal] =
- ( integral ^^ { case i => Literal(toNarrowestIntegerType(i)) }
- | sign.? ~ unsignedFloat ^^
- { case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f)) }
- )
-
- protected lazy val unsignedFloat: Parser[String] =
- ( "." ~> numericLit ^^ { u => "0." + u }
- | elem("decimal", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars)
- )
-
- protected lazy val sign: Parser[String] = ("+" | "-")
-
- protected lazy val integral: Parser[String] =
- sign.? ~ numericLit ^^ { case s ~ n => s.getOrElse("") + n }
-
- private def intervalUnit(unitName: String) = acceptIf {
- case lexical.Identifier(str) =>
- val normalized = lexical.normalizeKeyword(str)
- normalized == unitName || normalized == unitName + "s"
- case _ => false
- } {_ => "wrong interval unit"}
-
- protected lazy val month: Parser[Int] =
- integral <~ intervalUnit("month") ^^ { case num => num.toInt }
-
- protected lazy val year: Parser[Int] =
- integral <~ intervalUnit("year") ^^ { case num => num.toInt * 12 }
-
- protected lazy val microsecond: Parser[Long] =
- integral <~ intervalUnit("microsecond") ^^ { case num => num.toLong }
-
- protected lazy val millisecond: Parser[Long] =
- integral <~ intervalUnit("millisecond") ^^ {
- case num => num.toLong * CalendarInterval.MICROS_PER_MILLI
- }
-
- protected lazy val second: Parser[Long] =
- integral <~ intervalUnit("second") ^^ {
- case num => num.toLong * CalendarInterval.MICROS_PER_SECOND
- }
-
- protected lazy val minute: Parser[Long] =
- integral <~ intervalUnit("minute") ^^ {
- case num => num.toLong * CalendarInterval.MICROS_PER_MINUTE
- }
-
- protected lazy val hour: Parser[Long] =
- integral <~ intervalUnit("hour") ^^ {
- case num => num.toLong * CalendarInterval.MICROS_PER_HOUR
- }
-
- protected lazy val day: Parser[Long] =
- integral <~ intervalUnit("day") ^^ {
- case num => num.toLong * CalendarInterval.MICROS_PER_DAY
- }
-
- protected lazy val week: Parser[Long] =
- integral <~ intervalUnit("week") ^^ {
- case num => num.toLong * CalendarInterval.MICROS_PER_WEEK
- }
-
- private def intervalKeyword(keyword: String) = acceptIf {
- case lexical.Identifier(str) =>
- lexical.normalizeKeyword(str) == keyword
- case _ => false
- } {_ => "wrong interval keyword"}
-
- protected lazy val intervalLiteral: Parser[Literal] =
- ( INTERVAL ~> stringLit <~ intervalKeyword("year") ~ intervalKeyword("to") ~
- intervalKeyword("month") ^^ { case s =>
- Literal(CalendarInterval.fromYearMonthString(s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("day") ~ intervalKeyword("to") ~
- intervalKeyword("second") ^^ { case s =>
- Literal(CalendarInterval.fromDayTimeString(s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("year") ^^ { case s =>
- Literal(CalendarInterval.fromSingleUnitString("year", s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("month") ^^ { case s =>
- Literal(CalendarInterval.fromSingleUnitString("month", s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("day") ^^ { case s =>
- Literal(CalendarInterval.fromSingleUnitString("day", s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("hour") ^^ { case s =>
- Literal(CalendarInterval.fromSingleUnitString("hour", s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("minute") ^^ { case s =>
- Literal(CalendarInterval.fromSingleUnitString("minute", s))
- }
- | INTERVAL ~> stringLit <~ intervalKeyword("second") ^^ { case s =>
- Literal(CalendarInterval.fromSingleUnitString("second", s))
- }
- | INTERVAL ~> year.? ~ month.? ~ week.? ~ day.? ~ hour.? ~ minute.? ~ second.? ~
- millisecond.? ~ microsecond.? ^^ { case year ~ month ~ week ~ day ~ hour ~ minute ~ second ~
- millisecond ~ microsecond =>
- if (!Seq(year, month, week, day, hour, minute, second,
- millisecond, microsecond).exists(_.isDefined)) {
- throw new AnalysisException(
- "at least one time unit should be given for interval literal")
- }
- val months = Seq(year, month).map(_.getOrElse(0)).sum
- val microseconds = Seq(week, day, hour, minute, second, millisecond, microsecond)
- .map(_.getOrElse(0L)).sum
- Literal(new CalendarInterval(months, microseconds))
- }
- )
-
- private def toNarrowestIntegerType(value: String): Any = {
- val bigIntValue = BigDecimal(value)
-
- bigIntValue match {
- case v if bigIntValue.isValidInt => v.toIntExact
- case v if bigIntValue.isValidLong => v.toLongExact
- case v => v.underlying()
- }
- }
-
- private def toDecimalOrDouble(value: String): Any = {
- val decimal = BigDecimal(value)
- // follow the behavior in MS SQL Server
- // https://msdn.microsoft.com/en-us/library/ms179899.aspx
- if (value.contains('E') || value.contains('e')) {
- decimal.doubleValue()
- } else {
- decimal.underlying()
- }
- }
-
- protected lazy val baseExpression: Parser[Expression] =
- ( "*" ^^^ UnresolvedStar(None)
- | rep1(ident <~ ".") <~ "*" ^^ { case target => UnresolvedStar(Option(target))}
- | primary
- )
-
- protected lazy val signedPrimary: Parser[Expression] =
- sign ~ primary ^^ { case s ~ e => if (s == "-") UnaryMinus(e) else e }
-
- protected lazy val attributeName: Parser[String] = acceptMatch("attribute name", {
- case lexical.Identifier(str) => str
- case lexical.Keyword(str) if !lexical.delimiters.contains(str) => str
- })
-
- protected lazy val primary: PackratParser[Expression] =
- ( literal
- | expression ~ ("[" ~> expression <~ "]") ^^
- { case base ~ ordinal => UnresolvedExtractValue(base, ordinal) }
- | (expression <~ ".") ~ ident ^^
- { case base ~ fieldName => UnresolvedExtractValue(base, Literal(fieldName)) }
- | cast
- | "(" ~> expression <~ ")"
- | function
- | dotExpressionHeader
- | signedPrimary
- | "~" ~> expression ^^ BitwiseNot
- | attributeName ^^ UnresolvedAttribute.quoted
- )
-
- protected lazy val dotExpressionHeader: Parser[Expression] =
- (ident <~ ".") ~ ident ~ rep("." ~> ident) ^^ {
- case i1 ~ i2 ~ rest => UnresolvedAttribute(Seq(i1, i2) ++ rest)
- }
-
- protected lazy val tableIdentifier: Parser[TableIdentifier] =
- (ident <~ ".").? ~ ident ^^ {
- case maybeDbName ~ tableName => TableIdentifier(tableName, maybeDbName)
- }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
index e1fd22e367..ec833d6789 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
@@ -447,6 +447,7 @@ object HyperLogLogPlusPlus {
private def validateDoubleLiteral(exp: Expression): Double = exp match {
case Literal(d: Double, DoubleType) => d
+ case Literal(dec: Decimal, _) => dec.toDouble
case _ =>
throw new AnalysisException("The second argument should be a double literal.")
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
index ba9d2524a9..6d25de98ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
@@ -108,6 +108,7 @@ class CatalystQlSuite extends PlanTest {
}
assertRight("9.0e1", 90)
+ assertRight(".9e+2", 90)
assertRight("0.9e+2", 90)
assertRight("900e-1", 90)
assertRight("900.0E-1", 90)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
deleted file mode 100644
index b0884f5287..0000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAlias
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GreaterThan, Literal, Not}
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, OneRowRelation, Project}
-import org.apache.spark.unsafe.types.CalendarInterval
-
-private[sql] case class TestCommand(cmd: String) extends LogicalPlan with Command {
- override def output: Seq[Attribute] = Seq.empty
- override def children: Seq[LogicalPlan] = Seq.empty
-}
-
-private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser {
- protected val EXECUTE = Keyword("THISISASUPERLONGKEYWORDTEST")
-
- override protected lazy val start: Parser[LogicalPlan] = set
-
- private lazy val set: Parser[LogicalPlan] =
- EXECUTE ~> ident ^^ {
- case fileName => TestCommand(fileName)
- }
-}
-
-private[sql] class CaseInsensitiveTestParser extends AbstractSparkSQLParser {
- protected val EXECUTE = Keyword("EXECUTE")
-
- override protected lazy val start: Parser[LogicalPlan] = set
-
- private lazy val set: Parser[LogicalPlan] =
- EXECUTE ~> ident ^^ {
- case fileName => TestCommand(fileName)
- }
-}
-
-class SqlParserSuite extends PlanTest {
-
- test("test long keyword") {
- val parser = new SuperLongKeywordTestParser
- assert(TestCommand("NotRealCommand") ===
- parser.parse("ThisIsASuperLongKeyWordTest NotRealCommand"))
- }
-
- test("test case insensitive") {
- val parser = new CaseInsensitiveTestParser
- assert(TestCommand("NotRealCommand") === parser.parse("EXECUTE NotRealCommand"))
- assert(TestCommand("NotRealCommand") === parser.parse("execute NotRealCommand"))
- assert(TestCommand("NotRealCommand") === parser.parse("exEcute NotRealCommand"))
- }
-
- test("test NOT operator with comparison operations") {
- val parsed = SqlParser.parse("SELECT NOT TRUE > TRUE")
- val expected = Project(
- UnresolvedAlias(
- Not(
- GreaterThan(Literal(true), Literal(true)))
- ) :: Nil,
- OneRowRelation)
- comparePlans(parsed, expected)
- }
-
- test("support hive interval literal") {
- def checkInterval(sql: String, result: CalendarInterval): Unit = {
- val parsed = SqlParser.parse(sql)
- val expected = Project(
- UnresolvedAlias(
- Literal(result)
- ) :: Nil,
- OneRowRelation)
- comparePlans(parsed, expected)
- }
-
- def checkYearMonth(lit: String): Unit = {
- checkInterval(
- s"SELECT INTERVAL '$lit' YEAR TO MONTH",
- CalendarInterval.fromYearMonthString(lit))
- }
-
- def checkDayTime(lit: String): Unit = {
- checkInterval(
- s"SELECT INTERVAL '$lit' DAY TO SECOND",
- CalendarInterval.fromDayTimeString(lit))
- }
-
- def checkSingleUnit(lit: String, unit: String): Unit = {
- checkInterval(
- s"SELECT INTERVAL '$lit' $unit",
- CalendarInterval.fromSingleUnitString(unit, lit))
- }
-
- checkYearMonth("123-10")
- checkYearMonth("496-0")
- checkYearMonth("-2-3")
- checkYearMonth("-123-0")
-
- checkDayTime("99 11:22:33.123456789")
- checkDayTime("-99 11:22:33.123456789")
- checkDayTime("10 9:8:7.123456789")
- checkDayTime("1 0:0:0")
- checkDayTime("-1 0:0:0")
- checkDayTime("1 0:0:1")
-
- for (unit <- Seq("year", "month", "day", "hour", "minute", "second")) {
- checkSingleUnit("7", unit)
- checkSingleUnit("-7", unit)
- checkSingleUnit("0", unit)
- }
-
- checkSingleUnit("13.123456789", "second")
- checkSingleUnit("-13.123456789", "second")
- }
-
- test("support scientific notation") {
- def assertRight(input: String, output: Double): Unit = {
- val parsed = SqlParser.parse("SELECT " + input)
- val expected = Project(
- UnresolvedAlias(
- Literal(output)
- ) :: Nil,
- OneRowRelation)
- comparePlans(parsed, expected)
- }
-
- assertRight("9.0e1", 90)
- assertRight(".9e+2", 90)
- assertRight("0.9e+2", 90)
- assertRight("900e-1", 90)
- assertRight("900.0E-1", 90)
- assertRight("9.e+1", 90)
-
- intercept[RuntimeException](SqlParser.parse("SELECT .e3"))
- }
-}