[SPARK-3654][SQL] Unifies SQL and HiveQL parsers

This PR is a follow up of #2590, and tries to introduce a top level SQL parser entry point for all SQL dialects supported by Spark SQL. A top level parser `SparkSQLParser` is introduced to handle the syntaxes that all SQL dialects should recognize (e.g. `CACHE TABLE`, `UNCACHE TABLE` and `SET`, etc.). For all the syntaxes this parser doesn't recognize directly, it fallbacks to a specified function that tries to parse arbitrary input to a `LogicalPlan`. This function is typically another parser combinator like `SqlParser`. DDL syntaxes introduced in #2475 can be moved to here. The `ExtendedHiveQlParser` now only handle Hive specific extensions. Also took the chance to refactor/reformat `SqlParser` for better readability. Author: Cheng Lian <lian.cs.zju@gmail.com> Closes #2698 from liancheng/gen-sql-parser and squashes the following commits: ceada76 [Cheng Lian] Minor styling fixes 9738934 [Cheng Lian] Minor refactoring, removes optional trailing ";" in the parser bb2ab12 [Cheng Lian] SET property value can be empty string ce8860b [Cheng Lian] Passes test suites e86968e [Cheng Lian] Removes debugging code 8bcace5 [Cheng Lian] Replaces digit.+ to rep1(digit) (Scala style checking doesn't like it) d15d54f [Cheng Lian] Unifies SQL and HiveQL parsers
author: Cheng Lian <lian.cs.zju@gmail.com> 2014-10-09 18:25:06 -0700
committer: Michael Armbrust <michael@databricks.com> 2014-10-09 18:25:06 -0700
commit: edf02da389f75df5a42465d41f035d6b65599848 (patch)
tree: 1530a6ae85e804ff8ed0498e69a971bc569ee5ca /sql/core/src
parent: 363baacaded56047bcc63276d729ab911e0336cf (diff)
download: spark-edf02da389f75df5a42465d41f035d6b65599848.tar.gz
spark-edf02da389f75df5a42465d41f035d6b65599848.tar.bz2
spark-edf02da389f75df5a42465d41f035d6b65599848.zip
5 files changed, 26 insertions, 27 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 014e1e2826..23e7b2d270 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -66,12 +66,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @transient
   protected[sql] lazy val analyzer: Analyzer =
     new Analyzer(catalog, functionRegistry, caseSensitive = true)
+
   @transient
   protected[sql] val optimizer = Optimizer
+
   @transient
-  protected[sql] val parser = new catalyst.SqlParser
+  protected[sql] val sqlParser = {
+    val fallback = new catalyst.SqlParser
+    new catalyst.SparkSQLParser(fallback(_))
+  }
 
-  protected[sql] def parseSql(sql: String): LogicalPlan = parser(sql)
+  protected[sql] def parseSql(sql: String): LogicalPlan = sqlParser(sql)
   protected[sql] def executeSql(sql: String): this.QueryExecution = executePlan(parseSql(sql))
   protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution { val logical = plan }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index bbf17b9fad..4f1af7234d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -304,8 +304,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   case class CommandStrategy(context: SQLContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.SetCommand(key, value) =>
-        Seq(execution.SetCommand(key, value, plan.output)(context))
+      case logical.SetCommand(kv) =>
+        Seq(execution.SetCommand(kv, plan.output)(context))
       case logical.ExplainCommand(logicalPlan, extended) =>
         Seq(execution.ExplainCommand(logicalPlan, plan.output, extended)(context))
       case logical.CacheTableCommand(tableName, optPlan, isLazy) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index d49633c24a..5859eba408 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -48,29 +48,28 @@ trait Command {
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class SetCommand(
-    key: Option[String], value: Option[String], output: Seq[Attribute])(
+case class SetCommand(kv: Option[(String, Option[String])], output: Seq[Attribute])(
     @transient context: SQLContext)
   extends LeafNode with Command with Logging {
 
-  override protected lazy val sideEffectResult: Seq[Row] = (key, value) match {
-    // Set value for key k.
-    case (Some(k), Some(v)) =>
-      if (k == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
+  override protected lazy val sideEffectResult: Seq[Row] = kv match {
+    // Set value for the key.
+    case Some((key, Some(value))) =>
+      if (key == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
         logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
           s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
-        context.setConf(SQLConf.SHUFFLE_PARTITIONS, v)
-        Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$v"))
+        context.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
+        Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
       } else {
-        context.setConf(k, v)
-        Seq(Row(s"$k=$v"))
+        context.setConf(key, value)
+        Seq(Row(s"$key=$value"))
       }
 
-    // Query the value bound to key k.
-    case (Some(k), _) =>
+    // Query the value bound to the key.
+    case Some((key, None)) =>
       // TODO (lian) This is just a workaround to make the Simba ODBC driver work.
       // Should remove this once we get the ODBC driver updated.
-      if (k == "-v") {
+      if (key == "-v") {
         val hiveJars = Seq(
           "hive-exec-0.12.0.jar",
           "hive-service-0.12.0.jar",
@@ -84,23 +83,20 @@ case class SetCommand(
           Row("system:java.class.path=" + hiveJars),
           Row("system:sun.java.command=shark.SharkServer2"))
       } else {
-        if (k == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
+        if (key == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
           logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
             s"showing ${SQLConf.SHUFFLE_PARTITIONS} instead.")
           Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=${context.numShufflePartitions}"))
         } else {
-          Seq(Row(s"$k=${context.getConf(k, "<undefined>")}"))
+          Seq(Row(s"$key=${context.getConf(key, "<undefined>")}"))
         }
       }
 
     // Query all key-value pairs that are set in the SQLConf of the context.
-    case (None, None) =>
+    case _ =>
       context.getAllConfs.map { case (k, v) =>
         Row(s"$k=$v")
       }.toSeq
-
-    case _ =>
-      throw new IllegalArgumentException()
   }
 
   override def otherCopyArgs = context :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 1e624f9700..c87ded81fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -69,7 +69,7 @@ class CachedTableSuite extends QueryTest {
   test("calling .unpersist() should drop in-memory columnar cache") {
     table("testData").cache()
     table("testData").count()
-    table("testData").unpersist(true)
+    table("testData").unpersist(blocking = true)
     assertCached(table("testData"), 0)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 79de1bb855..a94022c0cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -42,7 +42,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     TimeZone.setDefault(origZone)
   }
 
-
   test("SPARK-3176 Added Parser of SQL ABS()") {
     checkAnswer(
       sql("SELECT ABS(-1.3)"),
@@ -61,7 +60,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       4)
   }
 
-
   test("SPARK-2041 column name equals tablename") {
     checkAnswer(
       sql("SELECT tableName FROM tableName"),
@@ -694,6 +692,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
 
   test("SPARK-3813 CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END") {
     checkAnswer(
-      sql("SELECT CASE WHEN key=1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"), 1)
+      sql("SELECT CASE WHEN key = 1 THEN 1 ELSE 2 END FROM testData WHERE key = 1 group by key"), 1)
   }
 }
author	Cheng Lian <lian.cs.zju@gmail.com>	2014-10-09 18:25:06 -0700
committer	Michael Armbrust <michael@databricks.com>	2014-10-09 18:25:06 -0700
commit	edf02da389f75df5a42465d41f035d6b65599848 (patch)
tree	1530a6ae85e804ff8ed0498e69a971bc569ee5ca /sql/core/src
parent	363baacaded56047bcc63276d729ab911e0336cf (diff)
download	spark-edf02da389f75df5a42465d41f035d6b65599848.tar.gz spark-edf02da389f75df5a42465d41f035d6b65599848.tar.bz2 spark-edf02da389f75df5a42465d41f035d6b65599848.zip