From edf02da389f75df5a42465d41f035d6b65599848 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Thu, 9 Oct 2014 18:25:06 -0700
Subject: [SPARK-3654][SQL] Unifies SQL and HiveQL parsers

This PR is a follow up of #2590, and tries to introduce a top level SQL parser entry point for all SQL dialects supported by Spark SQL.

A top level parser `SparkSQLParser` is introduced to handle the syntaxes that all SQL dialects should recognize (e.g. `CACHE TABLE`, `UNCACHE TABLE` and `SET`, etc.). For all the syntaxes this parser doesn't recognize directly, it fallbacks to a specified function that tries to parse arbitrary input to a `LogicalPlan`. This function is typically another parser combinator like `SqlParser`. DDL syntaxes introduced in #2475 can be moved to here.

The `ExtendedHiveQlParser` now only handle Hive specific extensions.

Also took the chance to refactor/reformat `SqlParser` for better readability.

Author: Cheng Lian <lian.cs.zju@gmail.com>

Closes #2698 from liancheng/gen-sql-parser and squashes the following commits:

ceada76 [Cheng Lian] Minor styling fixes
9738934 [Cheng Lian] Minor refactoring, removes optional trailing ";" in the parser
bb2ab12 [Cheng Lian] SET property value can be empty string
ce8860b [Cheng Lian] Passes test suites
e86968e [Cheng Lian] Removes debugging code
8bcace5 [Cheng Lian] Replaces digit.+ to rep1(digit) (Scala style checking doesn't like it)
d15d54f [Cheng Lian] Unifies SQL and HiveQL parsers
---
 .../apache/spark/sql/catalyst/SparkSQLParser.scala | 186 +++++++++
 .../org/apache/spark/sql/catalyst/SqlParser.scala  | 426 ++++++++-------------
 .../sql/catalyst/plans/logical/commands.scala      |  15 +-
 3 files changed, 355 insertions(+), 272 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala

(limited to 'sql/catalyst/src/main/scala/org/apache')

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala
new file mode 100644
index 0000000000..04467342e6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.lexical.StdLexical
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.{PackratParsers, RegexParsers}
+import scala.util.parsing.input.CharArrayReader.EofCh
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+private[sql] abstract class AbstractSparkSQLParser
+  extends StandardTokenParsers with PackratParsers {
+
+  def apply(input: String): LogicalPlan = phrase(start)(new lexical.Scanner(input)) match {
+    case Success(plan, _) => plan
+    case failureOrError => sys.error(failureOrError.toString)
+  }
+
+  protected case class Keyword(str: String)
+
+  protected def start: Parser[LogicalPlan]
+
+  // Returns the whole input string
+  protected lazy val wholeInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(in.source.toString, in.drop(in.source.length()))
+  }
+
+  // Returns the rest of the input string that are not parsed yet
+  protected lazy val restInput: Parser[String] = new Parser[String] {
+    def apply(in: Input): ParseResult[String] =
+      Success(
+        in.source.subSequence(in.offset, in.source.length()).toString,
+        in.drop(in.source.length()))
+  }
+}
+
+class SqlLexical(val keywords: Seq[String]) extends StdLexical {
+  case class FloatLit(chars: String) extends Token {
+    override def toString = chars
+  }
+
+  reserved ++= keywords.flatMap(w => allCaseVersions(w))
+
+  delimiters += (
+    "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
+    ",", ";", "%", "{", "}", ":", "[", "]", "."
+  )
+
+  override lazy val token: Parser[Token] =
+    ( identChar ~ (identChar | digit).* ^^
+      { case first ~ rest => processIdent((first :: rest).mkString) }
+    | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
+        case i ~ None    => NumericLit(i.mkString)
+        case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
+      }
+    | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
+      { case chars => StringLit(chars mkString "") }
+    | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
+      { case chars => StringLit(chars mkString "") }
+    | EofCh ^^^ EOF
+    | '\'' ~> failure("unclosed string literal")
+    | '"' ~> failure("unclosed string literal")
+    | delim
+    | failure("illegal character")
+    )
+
+  override def identChar = letter | elem('_')
+
+  override def whitespace: Parser[Any] =
+    ( whitespaceChar
+    | '/' ~ '*' ~ comment
+    | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
+    | '#' ~ chrExcept(EofCh, '\n').*
+    | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
+    | '/' ~ '*' ~ failure("unclosed comment")
+    ).*
+
+  /** Generate all variations of upper and lower case of a given string */
+  def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
+    if (s == "") {
+      Stream(prefix)
+    } else {
+      allCaseVersions(s.tail, prefix + s.head.toLower) ++
+        allCaseVersions(s.tail, prefix + s.head.toUpper)
+    }
+  }
+}
+
+/**
+ * The top level Spark SQL parser. This parser recognizes syntaxes that are available for all SQL
+ * dialects supported by Spark SQL, and delegates all the other syntaxes to the `fallback` parser.
+ *
+ * @param fallback A function that parses an input string to a logical plan
+ */
+private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser {
+
+  // A parser for the key-value part of the "SET [key = [value ]]" syntax
+  private object SetCommandParser extends RegexParsers {
+    private val key: Parser[String] = "(?m)[^=]+".r
+
+    private val value: Parser[String] = "(?m).*$".r
+
+    private val pair: Parser[LogicalPlan] =
+      (key ~ ("=".r ~> value).?).? ^^ {
+        case None => SetCommand(None)
+        case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)))
+      }
+
+    def apply(input: String): LogicalPlan = parseAll(pair, input) match {
+      case Success(plan, _) => plan
+      case x => sys.error(x.toString)
+    }
+  }
+
+  protected val AS      = Keyword("AS")
+  protected val CACHE   = Keyword("CACHE")
+  protected val LAZY    = Keyword("LAZY")
+  protected val SET     = Keyword("SET")
+  protected val TABLE   = Keyword("TABLE")
+  protected val SOURCE  = Keyword("SOURCE")
+  protected val UNCACHE = Keyword("UNCACHE")
+
+  protected implicit def asParser(k: Keyword): Parser[String] =
+    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
+
+  private val reservedWords: Seq[String] =
+    this
+      .getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].str)
+
+  override val lexical = new SqlLexical(reservedWords)
+
+  override protected lazy val start: Parser[LogicalPlan] =
+    cache | uncache | set | shell | source | others
+
+  private lazy val cache: Parser[LogicalPlan] =
+    CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ {
+      case isLazy ~ tableName ~ plan =>
+        CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined)
+    }
+
+  private lazy val uncache: Parser[LogicalPlan] =
+    UNCACHE ~ TABLE ~> ident ^^ {
+      case tableName => UncacheTableCommand(tableName)
+    }
+
+  private lazy val set: Parser[LogicalPlan] =
+    SET ~> restInput ^^ {
+      case input => SetCommandParser(input)
+    }
+
+  private lazy val shell: Parser[LogicalPlan] =
+    "!" ~> restInput ^^ {
+      case input => ShellCommand(input.trim)
+    }
+
+  private lazy val source: Parser[LogicalPlan] =
+    SOURCE ~> restInput ^^ {
+      case input => SourceCommand(input.trim)
+    }
+
+  private lazy val others: Parser[LogicalPlan] =
+    wholeInput ^^ {
+      case input => fallback(input)
+    }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 4662f585cf..b4d606d37e 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -18,10 +18,6 @@
 package org.apache.spark.sql.catalyst
 
 import scala.language.implicitConversions
-import scala.util.parsing.combinator.lexical.StdLexical
-import scala.util.parsing.combinator.syntactical.StandardTokenParsers
-import scala.util.parsing.combinator.PackratParsers
-import scala.util.parsing.input.CharArrayReader.EofCh
 
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
@@ -39,31 +35,7 @@ import org.apache.spark.sql.catalyst.types._
  * This is currently included mostly for illustrative purposes.  Users wanting more complete support
  * for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
  */
-class SqlParser extends StandardTokenParsers with PackratParsers {
-
-  def apply(input: String): LogicalPlan = {
-    // Special-case out set commands since the value fields can be
-    // complex to handle without RegexParsers. Also this approach
-    // is clearer for the several possible cases of set commands.
-    if (input.trim.toLowerCase.startsWith("set")) {
-      input.trim.drop(3).split("=", 2).map(_.trim) match {
-        case Array("") => // "set"
-          SetCommand(None, None)
-        case Array(key) => // "set key"
-          SetCommand(Some(key), None)
-        case Array(key, value) => // "set key=value"
-          SetCommand(Some(key), Some(value))
-      }
-    } else {
-      phrase(query)(new lexical.Scanner(input)) match {
-        case Success(r, x) => r
-        case x => sys.error(x.toString)
-      }
-    }
-  }
-
-  protected case class Keyword(str: String)
-
+class SqlParser extends AbstractSparkSQLParser {
   protected implicit def asParser(k: Keyword): Parser[String] =
     lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
 
@@ -100,7 +72,6 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val IS = Keyword("IS")
   protected val JOIN = Keyword("JOIN")
   protected val LAST = Keyword("LAST")
-  protected val LAZY = Keyword("LAZY")
   protected val LEFT = Keyword("LEFT")
   protected val LIKE = Keyword("LIKE")
   protected val LIMIT = Keyword("LIMIT")
@@ -128,7 +99,6 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected val THEN = Keyword("THEN")
   protected val TIMESTAMP = Keyword("TIMESTAMP")
   protected val TRUE = Keyword("TRUE")
-  protected val UNCACHE = Keyword("UNCACHE")
   protected val UNION = Keyword("UNION")
   protected val UPPER = Keyword("UPPER")
   protected val WHEN = Keyword("WHEN")
@@ -136,7 +106,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
 
   // Use reflection to find the reserved words defined in this class.
   protected val reservedWords =
-    this.getClass
+    this
+      .getClass
       .getMethods
       .filter(_.getReturnType == classOf[Keyword])
       .map(_.invoke(this).asInstanceOf[Keyword].str)
@@ -150,86 +121,68 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     }
   }
 
-  protected lazy val query: Parser[LogicalPlan] = (
-    select * (
-        UNION ~ ALL ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) } |
-        INTERSECT ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Intersect(q1, q2) } |
-        EXCEPT ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Except(q1, q2)} |
-        UNION ~ opt(DISTINCT) ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Distinct(Union(q1, q2)) }
+  protected lazy val start: Parser[LogicalPlan] =
+    ( select *
+      ( UNION ~ ALL        ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Union(q1, q2) }
+      | INTERSECT          ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Intersect(q1, q2) }
+      | EXCEPT             ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Except(q1, q2)}
+      | UNION ~ DISTINCT.? ^^^ { (q1: LogicalPlan, q2: LogicalPlan) => Distinct(Union(q1, q2)) }
       )
-    | insert | cache | unCache
-  )
+    | insert
+    )
 
   protected lazy val select: Parser[LogicalPlan] =
-    SELECT ~> opt(DISTINCT) ~ projections ~
-    opt(from) ~ opt(filter) ~
-    opt(grouping) ~
-    opt(having) ~
-    opt(orderBy) ~
-    opt(limit) <~ opt(";") ^^ {
-      case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l  =>
-        val base = r.getOrElse(NoRelation)
-        val withFilter = f.map(f => Filter(f, base)).getOrElse(base)
-        val withProjection =
-          g.map {g =>
-            Aggregate(g, assignAliases(p), withFilter)
-          }.getOrElse(Project(assignAliases(p), withFilter))
-        val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
-        val withHaving = h.map(h => Filter(h, withDistinct)).getOrElse(withDistinct)
-        val withOrder = o.map(o => Sort(o, withHaving)).getOrElse(withHaving)
-        val withLimit = l.map { l => Limit(l, withOrder) }.getOrElse(withOrder)
-        withLimit
-  }
+    SELECT ~> DISTINCT.? ~
+      repsep(projection, ",") ~
+      (FROM   ~> relations).? ~
+      (WHERE  ~> expression).? ~
+      (GROUP  ~  BY ~> rep1sep(expression, ",")).? ~
+      (HAVING ~> expression).? ~
+      (ORDER  ~  BY ~> ordering).? ~
+      (LIMIT  ~> expression).? ^^ {
+        case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l  =>
+          val base = r.getOrElse(NoRelation)
+          val withFilter = f.map(f => Filter(f, base)).getOrElse(base)
+          val withProjection = g
+            .map(Aggregate(_, assignAliases(p), withFilter))
+            .getOrElse(Project(assignAliases(p), withFilter))
+          val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
+          val withHaving = h.map(Filter(_, withDistinct)).getOrElse(withDistinct)
+          val withOrder = o.map(Sort(_, withHaving)).getOrElse(withHaving)
+          val withLimit = l.map(Limit(_, withOrder)).getOrElse(withOrder)
+          withLimit
+      }
 
   protected lazy val insert: Parser[LogicalPlan] =
-    INSERT ~> opt(OVERWRITE) ~ inTo ~ select <~ opt(";") ^^ {
-      case o ~ r ~ s =>
-        val overwrite: Boolean = o.getOrElse("") == "OVERWRITE"
-        InsertIntoTable(r, Map[String, Option[String]](), s, overwrite)
-    }
-
-  protected lazy val cache: Parser[LogicalPlan] =
-    CACHE ~> opt(LAZY) ~ (TABLE ~> ident) ~ opt(AS ~> select) <~ opt(";") ^^ {
-      case isLazy ~ tableName ~ plan =>
-        CacheTableCommand(tableName, plan, isLazy.isDefined)
-    }
-
-  protected lazy val unCache: Parser[LogicalPlan] =
-    UNCACHE ~ TABLE ~> ident <~ opt(";") ^^ {
-      case tableName => UncacheTableCommand(tableName)
+    INSERT ~> OVERWRITE.? ~ (INTO ~> relation) ~ select ^^ {
+      case o ~ r ~ s => InsertIntoTable(r, Map.empty[String, Option[String]], s, o.isDefined)
     }
 
-  protected lazy val projections: Parser[Seq[Expression]] = repsep(projection, ",")
-
   protected lazy val projection: Parser[Expression] =
-    expression ~ (opt(AS) ~> opt(ident)) ^^ {
-      case e ~ None => e
-      case e ~ Some(a) => Alias(e, a)()
+    expression ~ (AS.? ~> ident.?) ^^ {
+      case e ~ a => a.fold(e)(Alias(e, _)())
     }
 
-  protected lazy val from: Parser[LogicalPlan] = FROM ~> relations
-
-  protected lazy val inTo: Parser[LogicalPlan] = INTO ~> relation
-
   // Based very loosely on the MySQL Grammar.
   // http://dev.mysql.com/doc/refman/5.0/en/join.html
   protected lazy val relations: Parser[LogicalPlan] =
-    relation ~ "," ~ relation ^^ { case r1 ~ _ ~ r2 => Join(r1, r2, Inner, None) } |
-    relation
+    ( relation ~ ("," ~> relation) ^^ { case r1 ~ r2 => Join(r1, r2, Inner, None) }
+    | relation
+    )
 
   protected lazy val relation: Parser[LogicalPlan] =
-    joinedRelation |
-    relationFactor
+    joinedRelation | relationFactor
 
   protected lazy val relationFactor: Parser[LogicalPlan] =
-    ident ~ (opt(AS) ~> opt(ident)) ^^ {
-      case tableName ~ alias => UnresolvedRelation(None, tableName, alias)
-    } |
-    "(" ~> query ~ ")" ~ opt(AS) ~ ident ^^ { case s ~ _ ~ _ ~ a => Subquery(a, s) }
+    ( ident ~ (opt(AS) ~> opt(ident)) ^^ {
+        case tableName ~ alias => UnresolvedRelation(None, tableName, alias)
+      }
+    | ("(" ~> start <~ ")") ~ (AS.? ~> ident) ^^ { case s ~ a => Subquery(a, s) }
+    )
 
   protected lazy val joinedRelation: Parser[LogicalPlan] =
-    relationFactor ~ opt(joinType) ~ JOIN ~ relationFactor ~ opt(joinConditions) ^^ {
-      case r1 ~ jt ~ _ ~ r2 ~ cond =>
+    relationFactor ~ joinType.? ~ (JOIN ~> relationFactor) ~ joinConditions.? ^^ {
+      case r1 ~ jt ~ r2 ~ cond =>
         Join(r1, r2, joinType = jt.getOrElse(Inner), cond)
     }
 
@@ -237,160 +190,145 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
     ON ~> expression
 
   protected lazy val joinType: Parser[JoinType] =
-    INNER ^^^ Inner |
-    LEFT ~ SEMI ^^^ LeftSemi |
-    LEFT ~ opt(OUTER) ^^^ LeftOuter |
-    RIGHT ~ opt(OUTER) ^^^ RightOuter |
-    FULL ~ opt(OUTER) ^^^ FullOuter
-
-  protected lazy val filter: Parser[Expression] = WHERE ~ expression ^^ { case _ ~ e => e }
-
-  protected lazy val orderBy: Parser[Seq[SortOrder]] =
-    ORDER ~> BY ~> ordering
+    ( INNER           ^^^ Inner
+    | LEFT  ~ SEMI    ^^^ LeftSemi
+    | LEFT  ~ OUTER.? ^^^ LeftOuter
+    | RIGHT ~ OUTER.? ^^^ RightOuter
+    | FULL  ~ OUTER.? ^^^ FullOuter
+    )
 
   protected lazy val ordering: Parser[Seq[SortOrder]] =
-    rep1sep(singleOrder, ",") |
-    rep1sep(expression, ",") ~ opt(direction) ^^ {
-      case exps ~ None => exps.map(SortOrder(_, Ascending))
-      case exps ~ Some(d) => exps.map(SortOrder(_, d))
-    }
+    ( rep1sep(singleOrder, ",")
+    | rep1sep(expression, ",") ~ direction.? ^^ {
+        case exps ~ d => exps.map(SortOrder(_, d.getOrElse(Ascending)))
+      }
+    )
 
   protected lazy val singleOrder: Parser[SortOrder] =
-    expression ~ direction ^^ { case e ~ o => SortOrder(e,o) }
+    expression ~ direction ^^ { case e ~ o => SortOrder(e, o) }
 
   protected lazy val direction: Parser[SortDirection] =
-    ASC ^^^ Ascending |
-    DESC ^^^ Descending
-
-  protected lazy val grouping: Parser[Seq[Expression]] =
-    GROUP ~> BY ~> rep1sep(expression, ",")
-
-  protected lazy val having: Parser[Expression] =
-    HAVING ~> expression
-
-  protected lazy val limit: Parser[Expression] =
-    LIMIT ~> expression
+    ( ASC  ^^^ Ascending
+    | DESC ^^^ Descending
+    )
 
-  protected lazy val expression: Parser[Expression] = orExpression
+  protected lazy val expression: Parser[Expression] =
+    orExpression
 
   protected lazy val orExpression: Parser[Expression] =
-    andExpression * (OR ^^^ { (e1: Expression, e2: Expression) => Or(e1,e2) })
+    andExpression * (OR ^^^ { (e1: Expression, e2: Expression) => Or(e1, e2) })
 
   protected lazy val andExpression: Parser[Expression] =
-    comparisonExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1,e2) })
+    comparisonExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1, e2) })
 
   protected lazy val comparisonExpression: Parser[Expression] =
-    termExpression ~ "=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => EqualTo(e1, e2) } |
-    termExpression ~ "<" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThan(e1, e2) } |
-    termExpression ~ "<=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThanOrEqual(e1, e2) } |
-    termExpression ~ ">" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThan(e1, e2) } |
-    termExpression ~ ">=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThanOrEqual(e1, e2) } |
-    termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } |
-    termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } |
-    termExpression ~ BETWEEN ~ termExpression ~ AND ~ termExpression ^^ {
-      case e ~ _ ~ el ~ _  ~ eu => And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu))
-    } |
-    termExpression ~ RLIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } |
-    termExpression ~ REGEXP ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } |
-    termExpression ~ LIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => Like(e1, e2) } |
-    termExpression ~ IN ~ "(" ~ rep1sep(termExpression, ",") <~ ")" ^^ {
-      case e1 ~ _ ~ _ ~ e2 => In(e1, e2)
-    } |
-    termExpression ~ NOT ~ IN ~ "(" ~ rep1sep(termExpression, ",") <~ ")" ^^ {
-      case e1 ~ _ ~ _ ~ _ ~ e2 => Not(In(e1, e2))
-    } |
-    termExpression <~ IS ~ NULL ^^ { case e => IsNull(e) } |
-    termExpression <~ IS ~ NOT ~ NULL ^^ { case e => IsNotNull(e) } |
-    NOT ~> termExpression ^^ {e => Not(e)} |
-    termExpression
+    ( termExpression ~ ("="  ~> termExpression) ^^ { case e1 ~ e2 => EqualTo(e1, e2) }
+    | termExpression ~ ("<"  ~> termExpression) ^^ { case e1 ~ e2 => LessThan(e1, e2) }
+    | termExpression ~ ("<=" ~> termExpression) ^^ { case e1 ~ e2 => LessThanOrEqual(e1, e2) }
+    | termExpression ~ (">"  ~> termExpression) ^^ { case e1 ~ e2 => GreaterThan(e1, e2) }
+    | termExpression ~ (">=" ~> termExpression) ^^ { case e1 ~ e2 => GreaterThanOrEqual(e1, e2) }
+    | termExpression ~ ("!=" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
+    | termExpression ~ ("<>" ~> termExpression) ^^ { case e1 ~ e2 => Not(EqualTo(e1, e2)) }
+    | termExpression ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ {
+        case e ~ el ~ eu => And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu))
+      }
+    | termExpression ~ (RLIKE  ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
+    | termExpression ~ (REGEXP ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) }
+    | termExpression ~ (LIKE   ~> termExpression) ^^ { case e1 ~ e2 => Like(e1, e2) }
+    | termExpression ~ (IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
+        case e1 ~ e2 => In(e1, e2)
+      }
+    | termExpression ~ (NOT ~ IN ~ "(" ~> rep1sep(termExpression, ",")) <~ ")" ^^ {
+        case e1 ~ e2 => Not(In(e1, e2))
+      }
+    | termExpression <~ IS ~ NULL ^^ { case e => IsNull(e) }
+    | termExpression <~ IS ~ NOT ~ NULL ^^ { case e => IsNotNull(e) }
+    | NOT ~> termExpression ^^ {e => Not(e)}
+    | termExpression
+    )
 
   protected lazy val termExpression: Parser[Expression] =
-    productExpression * (
-      "+" ^^^ { (e1: Expression, e2: Expression) => Add(e1,e2) } |
-      "-" ^^^ { (e1: Expression, e2: Expression) => Subtract(e1,e2) } )
+    productExpression *
+      ( "+" ^^^ { (e1: Expression, e2: Expression) => Add(e1, e2) }
+      | "-" ^^^ { (e1: Expression, e2: Expression) => Subtract(e1, e2) }
+      )
 
   protected lazy val productExpression: Parser[Expression] =
-    baseExpression * (
-      "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1,e2) } |
-      "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1,e2) } |
-      "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1,e2) }
-    )
+    baseExpression *
+      ( "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1, e2) }
+      | "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1, e2) }
+      | "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1, e2) }
+      )
 
   protected lazy val function: Parser[Expression] =
-    SUM ~> "(" ~> expression <~ ")" ^^ { case exp => Sum(exp) } |
-    SUM ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) } |
-    COUNT ~> "(" ~ "*" <~ ")" ^^ { case _ => Count(Literal(1)) } |
-    COUNT ~> "(" ~ expression <~ ")" ^^ { case dist ~ exp => Count(exp) } |
-    COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) } |
-    APPROXIMATE ~> COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ {
-      case exp => ApproxCountDistinct(exp)
-    } |
-    APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^ {
-      case s ~ _ ~ _ ~ _ ~ _ ~ e => ApproxCountDistinct(e, s.toDouble)
-    } |
-    FIRST ~> "(" ~> expression <~ ")" ^^ { case exp => First(exp) } |
-    LAST ~> "(" ~> expression <~ ")" ^^ { case exp => Last(exp) } |
-    AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } |
-    MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } |
-    MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } |
-    UPPER ~> "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) } |
-    LOWER ~> "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) } |
-    IF ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
-      case c ~ "," ~ t ~ "," ~ f => If(c,t,f)
-    } |
-    CASE ~> expression.? ~ (WHEN ~> expression ~ (THEN ~> expression)).* ~
-      (ELSE ~> expression).? <~ END ^^ {
-       case casePart ~ altPart ~ elsePart =>
-         val altExprs = altPart.flatMap {
-           case we ~ te =>
-             Seq(casePart.fold(we)(EqualTo(_, we)), te)
+    ( SUM   ~> "(" ~> expression             <~ ")" ^^ { case exp => Sum(exp) }
+    | SUM   ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) }
+    | COUNT ~  "(" ~> "*"                    <~ ")" ^^ { case _ => Count(Literal(1)) }
+    | COUNT ~  "(" ~> expression             <~ ")" ^^ { case exp => Count(exp) }
+    | COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) }
+    | APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^
+      { case exp => ApproxCountDistinct(exp) }
+    | APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^
+      { case s ~ _ ~ _ ~ _ ~ _ ~ e => ApproxCountDistinct(e, s.toDouble) }
+    | FIRST ~ "(" ~> expression <~ ")" ^^ { case exp => First(exp) }
+    | LAST  ~ "(" ~> expression <~ ")" ^^ { case exp => Last(exp) }
+    | AVG   ~ "(" ~> expression <~ ")" ^^ { case exp => Average(exp) }
+    | MIN   ~ "(" ~> expression <~ ")" ^^ { case exp => Min(exp) }
+    | MAX   ~ "(" ~> expression <~ ")" ^^ { case exp => Max(exp) }
+    | UPPER ~ "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) }
+    | LOWER ~ "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) }
+    | IF ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
+      { case c ~ t ~ f => If(c, t, f) }
+    | CASE ~> expression.? ~ (WHEN ~> expression ~ (THEN ~> expression)).* ~
+        (ELSE ~> expression).? <~ END ^^ {
+          case casePart ~ altPart ~ elsePart =>
+            val altExprs = altPart.flatMap { case whenExpr ~ thenExpr =>
+              Seq(casePart.fold(whenExpr)(EqualTo(_, whenExpr)), thenExpr)
+            }
+            CaseWhen(altExprs ++ elsePart.toList)
         }
-        CaseWhen(altExprs ++ elsePart.toList)
-    } |
-    (SUBSTR | SUBSTRING) ~> "(" ~> expression ~ "," ~ expression <~ ")" ^^ {
-      case s ~ "," ~ p => Substring(s,p,Literal(Integer.MAX_VALUE))
-    } |
-    (SUBSTR | SUBSTRING) ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
-      case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l)
-    } |
-    SQRT ~> "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) } |
-    ABS ~> "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) } |
-    ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ {
-      case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs)
-    }
+    | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) <~ ")" ^^
+      { case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) }
+    | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
+      { case s ~ p ~ l => Substring(s, p, l) }
+    | SQRT  ~ "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) }
+    | ABS   ~ "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) }
+    | ident ~ ("(" ~> repsep(expression, ",")) <~ ")" ^^
+      { case udfName ~ exprs => UnresolvedFunction(udfName, exprs) }
+    )
 
   protected lazy val cast: Parser[Expression] =
-    CAST ~> "(" ~> expression ~ AS ~ dataType <~ ")" ^^ { case exp ~ _ ~ t => Cast(exp, t) }
+    CAST ~ "(" ~> expression ~ (AS ~> dataType) <~ ")" ^^ { case exp ~ t => Cast(exp, t) }
 
   protected lazy val literal: Parser[Literal] =
-    numericLit ^^ {
-      case i if i.toLong > Int.MaxValue => Literal(i.toLong)
-      case i => Literal(i.toInt)
-    } |
-    NULL ^^^ Literal(null, NullType) |
-    floatLit ^^ {case f => Literal(f.toDouble) } |
-    stringLit ^^ {case s => Literal(s, StringType) }
+    ( numericLit ^^ {
+        case i if i.toLong > Int.MaxValue => Literal(i.toLong)
+        case i => Literal(i.toInt)
+      }
+    | NULL ^^^ Literal(null, NullType)
+    | floatLit ^^ {case f => Literal(f.toDouble) }
+    | stringLit ^^ {case s => Literal(s, StringType) }
+    )
 
   protected lazy val floatLit: Parser[String] =
     elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
 
   protected lazy val baseExpression: PackratParser[Expression] =
-    expression ~ "[" ~ expression <~ "]" ^^ {
-      case base ~ _ ~ ordinal => GetItem(base, ordinal)
-    } |
-    (expression <~ ".") ~ ident ^^ {
-      case base ~ fieldName => GetField(base, fieldName)
-    } |
-    TRUE ^^^ Literal(true, BooleanType) |
-    FALSE ^^^ Literal(false, BooleanType) |
-    cast |
-    "(" ~> expression <~ ")" |
-    function |
-    "-" ~> literal ^^ UnaryMinus |
-    dotExpressionHeader |
-    ident ^^ UnresolvedAttribute |
-    "*" ^^^ Star(None) |
-    literal
+    ( expression ~ ("[" ~> expression <~ "]") ^^
+      { case base ~ ordinal => GetItem(base, ordinal) }
+    | (expression <~ ".") ~ ident ^^
+      { case base ~ fieldName => GetField(base, fieldName) }
+    | TRUE  ^^^ Literal(true, BooleanType)
+    | FALSE ^^^ Literal(false, BooleanType)
+    | cast
+    | "(" ~> expression <~ ")"
+    | function
+    | "-" ~> literal ^^ UnaryMinus
+    | dotExpressionHeader
+    | ident ^^ UnresolvedAttribute
+    | "*" ^^^ Star(None)
+    | literal
+    )
 
   protected lazy val dotExpressionHeader: Parser[Expression] =
     (ident <~ ".") ~ ident ~ rep("." ~> ident) ^^ {
@@ -400,55 +338,3 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
   protected lazy val dataType: Parser[DataType] =
     STRING ^^^ StringType | TIMESTAMP ^^^ TimestampType
 }
-
-class SqlLexical(val keywords: Seq[String]) extends StdLexical {
-  case class FloatLit(chars: String) extends Token {
-    override def toString = chars
-  }
-
-  reserved ++= keywords.flatMap(w => allCaseVersions(w))
-
-  delimiters += (
-      "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
-      ",", ";", "%", "{", "}", ":", "[", "]", "."
-  )
-
-  override lazy val token: Parser[Token] = (
-    identChar ~ rep( identChar | digit ) ^^
-      { case first ~ rest => processIdent(first :: rest mkString "") }
-      | rep1(digit) ~ opt('.' ~> rep(digit)) ^^ {
-      case i ~ None    => NumericLit(i mkString "")
-      case i ~ Some(d) => FloatLit(i.mkString("") + "." + d.mkString(""))
-    }
-      | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^
-      { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
-      | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^
-      { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
-      | EofCh ^^^ EOF
-      | '\'' ~> failure("unclosed string literal")
-      | '\"' ~> failure("unclosed string literal")
-      | delim
-      | failure("illegal character")
-    )
-
-  override def identChar = letter | elem('_')
-
-  override def whitespace: Parser[Any] = rep(
-    whitespaceChar
-      | '/' ~ '*' ~ comment
-      | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') )
-      | '#' ~ rep( chrExcept(EofCh, '\n') )
-      | '-' ~ '-' ~ rep( chrExcept(EofCh, '\n') )
-      | '/' ~ '*' ~ failure("unclosed comment")
-  )
-
-  /** Generate all variations of upper and lower case of a given string */
-  def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
-    if (s == "") {
-      Stream(prefix)
-    } else {
-      allCaseVersions(s.tail, prefix + s.head.toLower) ++
-        allCaseVersions(s.tail, prefix + s.head.toUpper)
-    }
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
index 9a3848cfc6..b8ba2ee428 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -39,9 +39,9 @@ case class NativeCommand(cmd: String) extends Command {
 }
 
 /**
- * Commands of the form "SET (key) (= value)".
+ * Commands of the form "SET [key [= value] ]".
  */
-case class SetCommand(key: Option[String], value: Option[String]) extends Command {
+case class SetCommand(kv: Option[(String, Option[String])]) extends Command {
   override def output = Seq(
     AttributeReference("", StringType, nullable = false)())
 }
@@ -81,3 +81,14 @@ case class DescribeCommand(
     AttributeReference("data_type", StringType, nullable = false)(),
     AttributeReference("comment", StringType, nullable = false)())
 }
+
+/**
+ * Returned for the "! shellCommand" command
+ */
+case class ShellCommand(cmd: String) extends Command
+
+
+/**
+ * Returned for the "SOURCE file" command
+ */
+case class SourceCommand(filePath: String) extends Command
-- 
cgit v1.2.3