diff options
Diffstat (limited to 'src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala')
-rw-r--r-- | src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala new file mode 100644 index 0000000000..32d7502cda --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala @@ -0,0 +1,87 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package lexical + +import token._ +import input.CharArrayReader.EofCh +import scala.collection.mutable + +/** This component provides a standard lexical parser for a simple, + * [[http://scala-lang.org Scala]]-like language. It parses keywords and + * identifiers, numeric literals (integers), strings, and delimiters. + * + * To distinguish between identifiers and keywords, it uses a set of + * reserved identifiers: every string contained in `reserved` is returned + * as a keyword token. (Note that `=>` is hard-coded as a keyword.) + * Additionally, the kinds of delimiters can be specified by the + * `delimiters` set. + * + * Usually this component is used to break character-based input into + * bigger tokens, which are then passed to a token-parser (see + * [[scala.util.parsing.combinator.syntactical.TokenParsers]].) + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +class StdLexical extends Lexical with StdTokens { + // see `token` in `Scanners` + def token: Parser[Token] = + ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } + | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } + | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") } + | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") } + | EofCh ^^^ EOF + | '\'' ~> failure("unclosed string literal") + | '\"' ~> failure("unclosed string literal") + | delim + | failure("illegal character") + ) + + /** Returns the legal identifier chars, except digits. */ + def identChar = letter | elem('_') + + // see `whitespace in `Scanners` + def whitespace: Parser[Any] = rep[Any]( + whitespaceChar + | '/' ~ '*' ~ comment + | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) + | '/' ~ '*' ~ failure("unclosed comment") + ) + + protected def comment: Parser[Any] = ( + '*' ~ '/' ^^ { case _ => ' ' } + | chrExcept(EofCh) ~ comment + ) + + /** The set of reserved identifiers: these will be returned as `Keyword`s. */ + val reserved = new mutable.HashSet[String] + + /** The set of delimiters (ordering does not matter). */ + val delimiters = new mutable.HashSet[String] + + protected def processIdent(name: String) = + if (reserved contains name) Keyword(name) else Identifier(name) + + private lazy val _delim: Parser[Token] = { + // construct parser for delimiters by |'ing together the parsers for the individual delimiters, + // starting with the longest one -- otherwise a delimiter D will never be matched if there is + // another delimiter that is a prefix of D + def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) } + + val d = new Array[String](delimiters.size) + delimiters.copyToArray(d, 0) + scala.util.Sorting.quickSort(d) + (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x) + } + protected def delim: Parser[Token] = _delim +} |