[SPARK-13713][SQL] Migrate parser from ANTLR3 to ANTLR4

### What changes were proposed in this pull request? The current ANTLR3 parser is quite complex to maintain and suffers from code blow-ups. This PR introduces a new parser that is based on ANTLR4. This parser is based on the [Presto's SQL parser](https://github.com/facebook/presto/blob/master/presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4). The current implementation can parse and create Catalyst and SQL plans. Large parts of the HiveQl DDL and some of the DML functionality is currently missing, the plan is to add this in follow-up PRs. This PR is a work in progress, and work needs to be done in the following area's: - [x] Error handling should be improved. - [x] Documentation should be improved. - [x] Multi-Insert needs to be tested. - [ ] Naming and package locations. ### How was this patch tested? Catalyst and SQL unit tests. Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #11557 from hvanhovell/ngParser.
author: Herman van Hovell <hvanhovell@questtec.nl> 2016-03-28 12:31:12 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-03-28 12:31:12 -0700
commit: 600c0b69cab4767e8e5a6f4284777d8b9d4bd40e (patch)
tree: bae635ab17a8b58400127f20bbbe5acaecc92f98 /python
parent: 1528ff4c9affe1df103c4b3abd56a86c71d8b753 (diff)
download: spark-600c0b69cab4767e8e5a6f4284777d8b9d4bd40e.tar.gz
spark-600c0b69cab4767e8e5a6f4284777d8b9d4bd40e.tar.bz2
spark-600c0b69cab4767e8e5a6f4284777d8b9d4bd40e.zip
2 files changed, 12 insertions, 2 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 83ef76c13c..1a5d422af9 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -51,7 +51,7 @@ from pyspark.sql.types import UserDefinedType, _infer_type
 from pyspark.tests import ReusedPySparkTestCase
 from pyspark.sql.functions import UserDefinedFunction, sha2
 from pyspark.sql.window import Window
-from pyspark.sql.utils import AnalysisException, IllegalArgumentException
+from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
 
 
 class UTCOffsetTimezone(datetime.tzinfo):
@@ -1130,7 +1130,9 @@ class SQLTests(ReusedPySparkTestCase):
     def test_capture_analysis_exception(self):
         self.assertRaises(AnalysisException, lambda: self.sqlCtx.sql("select abc"))
         self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
-        self.assertRaises(AnalysisException, lambda: self.sqlCtx.sql("abc"))
+
+    def test_capture_parse_exception(self):
+        self.assertRaises(ParseException, lambda: self.sqlCtx.sql("abc"))
 
     def test_capture_illegalargument_exception(self):
         self.assertRaisesRegexp(IllegalArgumentException, "Setting negative mapred.reduce.tasks",
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index b0a0373372..b89ea8c6e0 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -33,6 +33,12 @@ class AnalysisException(CapturedException):
     """
 
 
+class ParseException(CapturedException):
+    """
+    Failed to parse a SQL command.
+    """
+
+
 class IllegalArgumentException(CapturedException):
     """
     Passed an illegal or inappropriate argument.
@@ -49,6 +55,8 @@ def capture_sql_exception(f):
                                              e.java_exception.getStackTrace()))
             if s.startswith('org.apache.spark.sql.AnalysisException: '):
                 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('org.apache.spark.sql.catalyst.parser.ng.ParseException: '):
+                raise ParseException(s.split(': ', 1)[1], stackTrace)
             if s.startswith('java.lang.IllegalArgumentException: '):
                 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
             raise
author	Herman van Hovell <hvanhovell@questtec.nl>	2016-03-28 12:31:12 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-03-28 12:31:12 -0700
commit	600c0b69cab4767e8e5a6f4284777d8b9d4bd40e (patch)
tree	bae635ab17a8b58400127f20bbbe5acaecc92f98 /python
parent	1528ff4c9affe1df103c4b3abd56a86c71d8b753 (diff)
download	spark-600c0b69cab4767e8e5a6f4284777d8b9d4bd40e.tar.gz spark-600c0b69cab4767e8e5a6f4284777d8b9d4bd40e.tar.bz2 spark-600c0b69cab4767e8e5a6f4284777d8b9d4bd40e.zip