aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main/java
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-01-15 15:19:10 -0800
committerReynold Xin <rxin@databricks.com>2016-01-15 15:19:10 -0800
commit7cd7f2202547224593517b392f56e49e4c94cabc (patch)
tree3deb6f260ce94c59d2e25bc29095582dfd637173 /sql/catalyst/src/main/java
parent3f1c58d60b85625ab3abf16456ce27c820453ecf (diff)
downloadspark-7cd7f2202547224593517b392f56e49e4c94cabc.tar.gz
spark-7cd7f2202547224593517b392f56e49e4c94cabc.tar.bz2
spark-7cd7f2202547224593517b392f56e49e4c94cabc.zip
[SPARK-12575][SQL] Grammar parity with existing SQL parser
In this PR the new CatalystQl parser stack reaches grammar parity with the old Parser-Combinator based SQL Parser. This PR also replaces all uses of the old Parser, and removes it from the code base. Although the existing Hive and SQL parser dialects were mostly the same, some kinks had to be worked out: - The SQL Parser allowed syntax like ```APPROXIMATE(0.01) COUNT(DISTINCT a)```. In order to make this work we needed to hardcode approximate operators in the parser, or we would have to create an approximate expression. ```APPROXIMATE_COUNT_DISTINCT(a, 0.01)``` would also do the job and is much easier to maintain. So, this PR **removes** this keyword. - The old SQL Parser supports ```LIMIT``` clauses in nested queries. This is **not supported** anymore. See https://github.com/apache/spark/pull/10689 for the rationale for this. - Hive has a charset name char set literal combination it supports, for instance the following expression ```_ISO-8859-1 0x4341464562616265``` would yield this string: ```CAFEbabe```. Hive will only allow charset names to start with an underscore. This is quite annoying in spark because as soon as you use a tuple names will start with an underscore. In this PR we **remove** this feature from the parser. It would be quite easy to implement such a feature as an Expression later on. - Hive and the SQL Parser treat decimal literals differently. Hive will turn any decimal into a ```Double``` whereas the SQL Parser would convert a non-scientific decimal into a ```BigDecimal```, and would turn a scientific decimal into a Double. We follow Hive's behavior here. The new parser supports a big decimal literal, for instance: ```81923801.42BD```, which can be used when a big decimal is needed. cc rxin viirya marmbrus yhuai cloud-fan Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #10745 from hvanhovell/SPARK-12575-2.
Diffstat (limited to 'sql/catalyst/src/main/java')
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java31
1 files changed, 1 insertions, 30 deletions
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java
index 5bc87b680f..2520c7bb8d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/parser/ParseUtils.java
@@ -18,12 +18,10 @@
package org.apache.spark.sql.catalyst.parser;
-import java.io.UnsupportedEncodingException;
-
/**
* A couple of utility methods that help with parsing ASTs.
*
- * Both methods in this class were take from the SemanticAnalyzer in Hive:
+ * The 'unescapeSQLString' method in this class was take from the SemanticAnalyzer in Hive:
* ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
*/
public final class ParseUtils {
@@ -31,33 +29,6 @@ public final class ParseUtils {
super();
}
- public static String charSetString(String charSetName, String charSetString)
- throws UnsupportedEncodingException {
- // The character set name starts with a _, so strip that
- charSetName = charSetName.substring(1);
- if (charSetString.charAt(0) == '\'') {
- return new String(unescapeSQLString(charSetString).getBytes(), charSetName);
- } else // hex input is also supported
- {
- assert charSetString.charAt(0) == '0';
- assert charSetString.charAt(1) == 'x';
- charSetString = charSetString.substring(2);
-
- byte[] bArray = new byte[charSetString.length() / 2];
- int j = 0;
- for (int i = 0; i < charSetString.length(); i += 2) {
- int val = Character.digit(charSetString.charAt(i), 16) * 16
- + Character.digit(charSetString.charAt(i + 1), 16);
- if (val > 127) {
- val = val - 256;
- }
- bArray[j++] = (byte)val;
- }
-
- return new String(bArray, charSetName);
- }
- }
-
private static final int[] multiplier = new int[] {1000, 100, 10, 1};
@SuppressWarnings("nls")