diff options
Diffstat (limited to 'src')
3 files changed, 174 insertions, 14 deletions
diff --git a/src/library/scala/util/parsing/combinator/JavaTokenParsers.scala b/src/library/scala/util/parsing/combinator/JavaTokenParsers.scala index 584da92c68..bc71391bdb 100644 --- a/src/library/scala/util/parsing/combinator/JavaTokenParsers.scala +++ b/src/library/scala/util/parsing/combinator/JavaTokenParsers.scala @@ -9,15 +9,49 @@ package scala.util.parsing.combinator +/** `JavaTokenParsers` differs from [[scala.util.parsing.combinator.RegexParsers]] + * by adding the following definitions: + * + * - `ident` + * - `wholeNumber` + * - `decimalNumber` + * - `stringLiteral` + * - `floatingPointNumber` + */ trait JavaTokenParsers extends RegexParsers { + /** Anything starting with an ASCII alphabetic character or underscore, + * followed by zero or more repetitions of regex's `\w`. + */ def ident: Parser[String] = """[a-zA-Z_]\w*""".r + /** An integer, without sign or with a negative sign. */ def wholeNumber: Parser[String] = """-?\d+""".r + /** Number following one of these rules: + * + * - An integer. For example: `13` + * - An integer followed by a decimal point. For example: `3.` + * - An integer followed by a decimal point and fractional part. For example: `3.14` + * - A decimal point followed by a fractional part. For example: `.1` + */ def decimalNumber: Parser[String] = """(\d+(\.\d*)?|\d*\.\d+)""".r + /** Double quotes (`"`) enclosing a sequence of: + * + * - Any character except double quotes, control characters or backslash (`\`) + * - A backslash followed by a slash, another backslash, or one of the letters + * `b`, `f`, `n`, `r` or `t`. + * - `\` followed by `u` followed by four hexadecimal digits + */ def stringLiteral: Parser[String] = ("\""+"""([^"\p{Cntrl}\\]|\\[\\/bfnrt]|\\u[a-fA-F0-9]{4})*"""+"\"").r + /** A number following the rules of `decimalNumber`, with the following + * optional additions: + * + * - Preceded by a negative sign + * - Followed by `e` or `E` and an optionally signed integer + * - Followed by `f`, `f`, `d` or `D` (after the above rule, if both are used) + */ def floatingPointNumber: Parser[String] = """-?(\d+(\.\d*)?|\d*\.\d+)([eE][+-]?\d+)?[fFdD]?""".r } diff --git a/src/library/scala/util/parsing/combinator/Parsers.scala b/src/library/scala/util/parsing/combinator/Parsers.scala index a7d3157fbd..1280ece013 100644 --- a/src/library/scala/util/parsing/combinator/Parsers.scala +++ b/src/library/scala/util/parsing/combinator/Parsers.scala @@ -17,31 +17,59 @@ import annotation.migration /** `Parsers` is a component that ''provides'' generic parser combinators. * - * It ''requires'' the type of the elements these parsers should parse + * There are two abstract members that must be defined in order to + * produce parsers: the type `Elem` and + * [[scala.util.parsing.combinator.Parsers.Parser]]. There are helper + * methods that produce concrete `Parser` implementations -- see ''primitive + * parser'' below. + * + * A `Parsers` may define multiple `Parser` instances, which are combined + * to produced the desired parser. + * + * The type of the elements these parsers should parse must be defined + * by declaring `Elem` * (each parser is polymorphic in the type of result it produces). * * There are two aspects to the result of a parser: * 1. success or failure * 1. the result. * - * A `Parser[T]` provides both kinds of information. + * A [[scala.util.parsing.combinator.Parsers.Parser]] produces both kinds of information, + * by returning a [[scala.util.parsing.combinator.Parsers.ParseResult]] when its `apply` + * method is called on an input. * * The term ''parser combinator'' refers to the fact that these parsers * are constructed from primitive parsers and composition operators, such - * as sequencing, alternation, optionality, repetition, lifting, and so on. + * as sequencing, alternation, optionality, repetition, lifting, and so on. For example, + * given `p1` and `p2` of type [[scala.util.parsing.combinator.Parsers.Parser]]: + * + * {{{ + * p1 ~ p2 // sequencing: must match p1 followed by p2 + * p1 | p2 // alternation: must match either p1 or p2, with preference given to p1 + * p1.? // optionality: may match p1 or not + * p1.* // repetition: matches any number of repetitions of p1 + * }}} + * + * These combinators are provided as methods on [[scala.util.parsing.combinator.Parsers.Parser]], + * or as methods taking one or more `Parsers` and returning a `Parser` provided in + * this class. * * A ''primitive parser'' is a parser that accepts or rejects a single * piece of input, based on a certain criterion, such as whether the * input... - * - is equal to some given object, - * - satisfies a certain predicate, - * - is in the domain of a given partial function, ... + * - is equal to some given object (see method `accept`), + * - satisfies a certain predicate (see method `acceptIf`), + * - is in the domain of a given partial function (see method `acceptMatch`) + * - or other conditions, by using one of the other methods available, or subclassing `Parser` + * + * Even more primitive parsers always produce the same result, irrespective of the input. See + * methods `success`, `err` and `failure` as examples. * - * Even more primitive parsers always produce the same result, irrespective of the input. + * @see [[scala.util.parsing.combinator.RegexParsers]] and other known subclasses for practical examples. * - * @author Martin Odersky - * @author Iulian Dragos - * @author Adriaan Moors + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors */ trait Parsers { /** the type of input elements the provided parsers consume (When consuming @@ -372,10 +400,14 @@ trait Parsers { * * ''From: G. Hutton. Higher-order functions for parsing. J. Funct. Program., 2(3):323--343, 1992.'' * - * @param fq a function that, given the result from this parser, returns - * the second parser to be applied - * @return a parser that succeeds if this parser succeeds (with result `x`) - * and if then `fq(x)` succeeds + * @example {{{ + * def perlRE = "m" ~> (".".r into (separator => """[^%s]*""".format(separator).r <~ separator)) + * }}} + * + * @param fq a function that, given the result from this parser, returns + * the second parser to be applied + * @return a parser that succeeds if this parser succeeds (with result `x`) + * and if then `fq(x)` succeeds */ def into[U](fq: T => Parser[U]): Parser[U] = flatMap(fq) @@ -484,16 +516,44 @@ trait Parsers { */ def accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = acceptMatch(expected, f) + /** A parser matching input elements that satisfy a given predicate. + * + * `acceptIf(p)(el => "Unexpected "+el)` succeeds if the input starts with an element `e` for which `p(e)` is true. + * + * @param err A function from the received element into an error message. + * @param p A predicate that determines which elements match. + * @return A parser for elements satisfying p(e). + */ def acceptIf(p: Elem => Boolean)(err: Elem => String): Parser[Elem] = Parser { in => if (p(in.first)) Success(in.first, in.rest) else Failure(err(in.first), in) } + /** The parser that matches an element in the domain of the partial function `f`. + * + * If `f` is defined on the first element in the input, `f` is applied + * to it to produce this parser's result. + * + * Example: The parser `acceptMatch("name", {case Identifier(n) => Name(n)})` + * accepts an `Identifier(n)` and returns a `Name(n)` + * + * @param expected a description of the kind of element this parser expects (for error messages) + * @param f a partial function that determines when this parser is successful and what its output is + * @return A parser that succeeds if `f` is applicable to the first element of the input, + * applying `f` to it to produce the result. + */ def acceptMatch[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = Parser{ in => if (f.isDefinedAt(in.first)) Success(f(in.first), in.rest) else Failure(expected+" expected", in) } + /** A parser that matches only the given [[scala.collection.Iterable]] collection of elements `es`. + * + * `acceptSeq(es)` succeeds if the input subsequently provides the elements in the iterable `es`. + * + * @param es the list of expected elements + * @return a Parser that recognizes a specified list of elements + */ def acceptSeq[ES <% Iterable[Elem]](es: ES): Parser[List[Elem]] = es.foldRight[Parser[List[Elem]]](success(Nil)){(x, pxs) => accept(x) ~ pxs ^^ mkList} @@ -518,6 +578,10 @@ trait Parsers { */ def success[T](v: T) = Parser{ in => Success(v, in) } + /** A helper method that turns a `Parser` into one that will + * print debugging information to stdout before and after + * being applied. + */ def log[T](p: => Parser[T])(name: String): Parser[T] = Parser{ in => println("trying "+ name +" at "+ in) val r = p(in) @@ -755,7 +819,22 @@ trait Parsers { } } + /** Given a concatenation with a repetition (list), move the concatenated element into the list */ def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } + + /** A wrapper over sequence of matches. + * + * Given `p1: Parser[A]` and `p2: Parser[B]`, a parser composed with + * `p1 ~ p2` will have type `Parser[~[A, B]]`. The successful result + * of the parser can be extracted from this case class. + * + * It also enables pattern matching, so something like this is possible: + * + * {{{ + * def concat(p1: Parser[String], p2: Parser[String]): Parser[String] = + * p1 ~ p2 ^^ { case a ~ b => a + b } + * }}} + */ case class ~[+a, +b](_1: a, _2: b) { override def toString = "("+ _1 +"~"+ _2 +")" } diff --git a/src/library/scala/util/parsing/combinator/RegexParsers.scala b/src/library/scala/util/parsing/combinator/RegexParsers.scala index 9186211e46..a06b9d59ce 100644 --- a/src/library/scala/util/parsing/combinator/RegexParsers.scala +++ b/src/library/scala/util/parsing/combinator/RegexParsers.scala @@ -14,6 +14,44 @@ import scala.util.matching.Regex import scala.util.parsing.input._ import scala.collection.immutable.PagedSeq +/** The ''most important'' differences between `RegexParsers` and + * [[scala.util.parsing.combinator.Parsers]] are: + * + * - `Elem` is defined to be [[scala.Char]] + * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, + * so that string literals can be used as parser combinators. + * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, + * so that regex expressions can be used as parser combinators. + * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, + * skip any whitespace before before each parser is called. + * - Protected val `whiteSpace` returns a regex that identifies whitespace. + * + * For example, this creates a very simple calculator receiving `String` input: + * + * {{{ + * object Calculator extends RegexParsers { + * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } + * def factor: Parser[Double] = number | "(" ~> expr <~ ")" + * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { + * case number ~ list => (number /: list) { + * case (x, "*" ~ y) => x * y + * case (x, "/" ~ y) => x / y + * } + * } + * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { + * case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /: + * case (x, "+" ~ y) => x + y + * case (x, "-" ~ y) => x - y + * } + * } + * + * def apply(input: String): Double = parseAll(expr, input) match { + * case Success(result, _) => result + * case failure : NoSuccess => scala.sys.error(failure.msg) + * } + * } + * }}} + */ trait RegexParsers extends Parsers { type Elem = Char @@ -22,6 +60,15 @@ trait RegexParsers extends Parsers { def skipWhitespace = whiteSpace.toString.length > 0 + /** Method called to handle whitespace before parsers. + * + * It checks `skipWhitespace` and, if true, skips anything + * matching `whiteSpace` starting from the current offset. + * + * @param source The input being parsed. + * @param offset The offset into `source` from which to match. + * @return The offset to be used for the next parser. + */ protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = if (skipWhitespace) (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { |