1 files changed, 87 insertions, 0 deletions
diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala
new file mode 100644
index 0000000000..32d7502cda
--- /dev/null
+++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala
@@ -0,0 +1,87 @@
+/*                     __                                               *\
+**     ________ ___   / /  ___     Scala API                            **
+**    / __/ __// _ | / /  / _ |    (c) 2006-2013, LAMP/EPFL             **
+**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
+** /____/\___/_/ |_/____/_/ | |                                         **
+**                          |/                                          **
+\*                                                                      */
+
+package scala
+package util.parsing
+package combinator
+package lexical
+
+import token._
+import input.CharArrayReader.EofCh
+import scala.collection.mutable
+
+/** This component provides a standard lexical parser for a simple,
+ *  [[http://scala-lang.org Scala]]-like language. It parses keywords and
+ *  identifiers, numeric literals (integers), strings, and delimiters.
+ *
+ *  To distinguish between identifiers and keywords, it uses a set of
+ *  reserved identifiers:  every string contained in `reserved` is returned
+ *  as a keyword token. (Note that `=>` is hard-coded as a keyword.)
+ *  Additionally, the kinds of delimiters can be specified by the
+ *  `delimiters` set.
+ *
+ *  Usually this component is used to break character-based input into
+ *  bigger tokens, which are then passed to a token-parser (see
+ *  [[scala.util.parsing.combinator.syntactical.TokenParsers]].)
+ *
+ * @author Martin Odersky
+ * @author Iulian Dragos
+ * @author Adriaan Moors
+ */
+class StdLexical extends Lexical with StdTokens {
+  // see `token` in `Scanners`
+  def token: Parser[Token] =
+    ( identChar ~ rep( identChar | digit )              ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
+    | digit ~ rep( digit )                              ^^ { case first ~ rest => NumericLit(first :: rest mkString "") }
+    | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
+    | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
+    | EofCh                                             ^^^ EOF
+    | '\'' ~> failure("unclosed string literal")
+    | '\"' ~> failure("unclosed string literal")
+    | delim
+    | failure("illegal character")
+    )
+
+  /** Returns the legal identifier chars, except digits. */
+  def identChar = letter | elem('_')
+
+  // see `whitespace in `Scanners`
+  def whitespace: Parser[Any] = rep[Any](
+      whitespaceChar
+    | '/' ~ '*' ~ comment
+    | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') )
+    | '/' ~ '*' ~ failure("unclosed comment")
+    )
+
+  protected def comment: Parser[Any] = (
+      '*' ~ '/'  ^^ { case _ => ' '  }
+    | chrExcept(EofCh) ~ comment
+    )
+
+  /** The set of reserved identifiers: these will be returned as `Keyword`s. */
+  val reserved = new mutable.HashSet[String]
+
+  /** The set of delimiters (ordering does not matter). */
+  val delimiters = new mutable.HashSet[String]
+
+  protected def processIdent(name: String) =
+    if (reserved contains name) Keyword(name) else Identifier(name)
+
+  private lazy val _delim: Parser[Token] = {
+    // construct parser for delimiters by |'ing together the parsers for the individual delimiters,
+    // starting with the longest one -- otherwise a delimiter D will never be matched if there is
+    // another delimiter that is a prefix of D
+    def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) }
+
+    val d = new Array[String](delimiters.size)
+    delimiters.copyToArray(d, 0)
+    scala.util.Sorting.quickSort(d)
+    (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x)
+  }
+  protected def delim: Parser[Token] = _delim
+}