From 6b801413fdb089d5084c64485867b9de7187a869 Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Fri, 14 Sep 2007 15:51:48 +0000 Subject: new version of combinator parsers --- .../scala/util/parsing/combinator1/$tilde.scala | 18 + .../parsing/combinator1/ImplicitConversions.scala | 36 ++ .../scala/util/parsing/combinator1/Parsers.scala | 675 +++++++++++++++++++++ .../util/parsing/combinator1/lexical/Lexical.scala | 41 ++ .../parsing/combinator1/lexical/Scanners.scala | 75 +++ .../parsing/combinator1/lexical/StdLexical.scala | 90 +++ .../syntactical/StandardTokenParsers.scala | 24 + .../combinator1/syntactical/StdTokenParsers.scala | 44 ++ .../combinator1/syntactical/TokenParsers.scala | 64 ++ .../util/parsing/combinator1/testing/Tester.scala | 45 ++ 10 files changed, 1112 insertions(+) create mode 100644 src/library/scala/util/parsing/combinator1/$tilde.scala create mode 100644 src/library/scala/util/parsing/combinator1/ImplicitConversions.scala create mode 100644 src/library/scala/util/parsing/combinator1/Parsers.scala create mode 100644 src/library/scala/util/parsing/combinator1/lexical/Lexical.scala create mode 100644 src/library/scala/util/parsing/combinator1/lexical/Scanners.scala create mode 100644 src/library/scala/util/parsing/combinator1/lexical/StdLexical.scala create mode 100644 src/library/scala/util/parsing/combinator1/syntactical/StandardTokenParsers.scala create mode 100644 src/library/scala/util/parsing/combinator1/syntactical/StdTokenParsers.scala create mode 100644 src/library/scala/util/parsing/combinator1/syntactical/TokenParsers.scala create mode 100644 src/library/scala/util/parsing/combinator1/testing/Tester.scala diff --git a/src/library/scala/util/parsing/combinator1/$tilde.scala b/src/library/scala/util/parsing/combinator1/$tilde.scala new file mode 100644 index 0000000000..25694c1488 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/$tilde.scala @@ -0,0 +1,18 @@ +package scala.util.parsing.combinator1 + +// p ~ q ~ r ^^ {case a ~ b ~ c => } +case class ~[+a, +b](_1: a, _2: b) { + override def toString = "("+ _1 +" ~ "+ _2 +")" +} + +// shortcut for scala.util.parsing.combinator.~(_, _) -- just ~(_, _) resolves to unary_~ +object mkTilde { def apply[a, b](_1: a, _2: b) = scala.util.parsing.combinator.~(_1, _2) } + + + //def flatten[t, s <: (~[t, s] \/ t)](p: ~[t, s]): List[t] = p match { + // case hd ~ tl => hd :: flatten(tl) + // case a ~ b => List(a, b) + //} + //def flatten[t, s <: ~[t, s]](p: ~[t, s]): List[t] = p._1 :: flatten(p) + //def flatten[t](p: ~[t, t]): List[t] = List(p._1, p._2) + diff --git a/src/library/scala/util/parsing/combinator1/ImplicitConversions.scala b/src/library/scala/util/parsing/combinator1/ImplicitConversions.scala new file mode 100644 index 0000000000..dc21fa0b87 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/ImplicitConversions.scala @@ -0,0 +1,36 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.util.parsing.combinator1 + +/** This object contains implicit conversions that come in handy when using the `^^' combinator + * {@see Parsers} to construct an AST from the concrete syntax. + *

+ * The reason for this is that the sequential composition combinator (`~') combines its constituents + * into a ~. When several `~'s are combined, this results in nested `~'s (to the left). + * The `flatten*' coercions makes it easy to apply an `n'-argument function to a nested ~ of + * depth (`n-1')

+ *

+ * The `headOptionTailToFunList' converts a function that takes a List[A] to a function that + * accepts a ~[A, Option[List[A]]] (this happens when, e.g., parsing something of the following + * shape: p ~ opt("." ~ repsep(p, ".")) -- where `p' is a parser that yields an A)

+ * + * @author Martin Odersky, Iulian Dragos, Adriaan Moors + */ +trait ImplicitConversions { self: Parsers => + implicit def flatten2[A, B, C] (f: (A, B) => C) = + (p: ~[A, B]) => p match {case a ~ b => f(a, b)} + implicit def flatten3[A, B, C, D] (f: (A, B, C) => D) = + (p: ~[~[A, B], C]) => p match {case a ~ b ~ c => f(a, b, c)} + implicit def flatten4[A, B, C, D, E] (f: (A, B, C, D) => E) = + (p: ~[~[~[A, B], C], D]) => p match {case a ~ b ~ c ~ d => f(a, b, c, d)} + implicit def flatten5[A, B, C, D, E, F](f: (A, B, C, D, E) => F) = + (p: ~[~[~[~[A, B], C], D], E]) => p match {case a ~ b ~ c ~ d ~ e=> f(a, b, c, d, e)} + implicit def headOptionTailToFunList[A, T] (f: List[A] => T)= + (p: ~[A, Option[List[A]]]) => f(p._1 :: (p._2 match { case Some(xs) => xs case None => Nil})) +} diff --git a/src/library/scala/util/parsing/combinator1/Parsers.scala b/src/library/scala/util/parsing/combinator1/Parsers.scala new file mode 100644 index 0000000000..872ec97009 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/Parsers.scala @@ -0,0 +1,675 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: Parsers.scala 12357 2007-07-18 21:55:08Z moors $ + +package scala.util.parsing.combinator1 + +import scala.util.parsing.input._ +import scala.collection.mutable.{Map=>MutableMap} + +// TODO: better error handling (labelling like parsec's ) +// TODO: memoisation (like packrat parsers?) + +/**

+ * Parsers is a component that provides generic + * parser combinators. + *

+ *

+ * It requires the type of the elements these parsers should parse + * (each parser is polymorphic in the type of result it produces). + *

+ *

+ * There are two aspects to the result of a parser: (1) success or failure, + * and (2) the result. A Parser[T] provides both kinds of + * information. + *

+ *

+ * The term ``parser combinator'' refers to the fact that these parsers + * are constructed from primitive parsers and composition operators, such + * as sequencing, alternation, optionality, repetition, lifting, and so on. + *

+ *

+ * A ``primitive parser'' is a parser that accepts or rejects a single + * piece of input, based on a certain criterion, such as whether the + * input... + *

+ *

+ * Even more primitive parsers always produce the same result, irrespective + * of the input. + *

+ * + * @requires Elem the type of elements the provided parsers consume + * (When consuming invidual characters, a parser is typically called a ``scanner'', + * which produces ``tokens'' that are consumed by what is normally called a ``parser''. + * Nonetheless, the same principles apply, regardless of the input type.)

+ *

+ * @provides Input = Reader[Elem] + * The type of input the parsers in this component expect.

+ *

+ * @provides Parser[+T] extends (Input => ParseResult[T]) + * Essentially, a `Parser[T]' is a function from `Input' to `ParseResult[T]'.

+ *

+ * @provides ParseResult[+T] is like an `Option[T]', in the sense that it is either + * `Success[T]', which consists of some result (:T) (and the rest of the input) or + * `Failure[T]', which provides an error message (and the rest of the input).

+ * + * @author Martin Odersky, Iulian Dragos, Adriaan Moors + */ +trait Parsers { + /** the type of input elements */ + type Elem + + /** The parser input is an abstract reader of input elements */ + type Input = Reader[Elem] + + /** A base class for parser results. + * A result is either successful or not (failure may be fatal, i.e., + * an Error, or not, i.e., a Failure) + * On success, provides a result of type T. + */ + sealed abstract class ParseResult[+T] { + /** Functional composition of ParseResults + * + * @param `f' the function to be lifted over this result + * @return `f' applied to the result of this `ParseResult', packaged up as a new `ParseResult' + */ + def map[U](f: T => U): ParseResult[U] + + /** Partial functional composition of ParseResults + * + * @param `f' the partial function to be lifted over this result + * @param error a function that takes the same argument as `f' and produces an error message + * to explain why `f' wasn't applicable (it is called when this is the case) + * @return if `f' f is defined at the result in this `ParseResult', + * `f' applied to the result of this `ParseResult', packaged up as a new `ParseResult'. + * If `f' is not defined, `Failure'. + */ + def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] + + def isEmpty = !successful + + /** Returns the embedded result */ + def get: T + + def getOrElse[B >: T](default: => B): B = + if (isEmpty) default else this.get + + val next: Input + + val successful: Boolean + } + + /** The success case of ParseResult: contains the result and the remaining input. + * + * @param result The parser's output + * @param next The parser's remaining input + */ + case class Success[+T](result: T, override val next: Input) extends ParseResult[T] { + def map[U](f: T => U) = Success(f(result), next) + def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] + = if(f.isDefinedAt(result)) Success(f(result), next) + else Failure(error(result), next) + + def get: T = result + + /** The toString method of a Success */ + override def toString = "["+next.pos+"] parsed: "+result + + val successful = true + } + + /** A common super-class for unsuccessful parse results + */ + sealed abstract class NoSuccess(val msg: String, override val next: Input) extends ParseResult[Nothing] { // when we don't care about the difference between Failure and Error + val successful = false + + def map[U](f: Nothing => U) = this + def mapPartial[U](f: PartialFunction[Nothing, U], error: Nothing => String): ParseResult[U] = this + + def get: Nothing = error("No result when parsing failed") + } + + /** The failure case of ParseResult: contains an error-message and the remaining input. + * Parsing will back-track when a failure occurs. + * + * @param msg An error message string describing the failure. + * @param next The parser's unconsumed input at the point where the failure occurred. + */ + case class Failure(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { + /** The toString method of a Failure yields an error message */ + override def toString = "["+next.pos+"] failure: "+msg+"\n\n"+next.pos.longString + } + + /** The fatal failure case of ParseResult: contains an error-message and the remaining input. + * No back-tracking is done when a parser returns an `Error' + * + * @param msg An error message string describing the error. + * @param next The parser's unconsumed input at the point where the error occurred. + */ + case class Error(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { + /** The toString method of an Error yields an error message */ + override def toString = "["+next.pos+"] error: "+msg+"\n\n"+next.pos.longString + } + + /** The root class of parsers. + * Parsers are functions from the Input type to ParseResult + */ + abstract class Parser[+T] extends (Input => ParseResult[T]) { + /** An unspecified method that defines the behaviour of this parser. + */ + def apply(in: Input): ParseResult[T] + + + // the operator formerly known as +++, ++, &, but now, behold the venerable ~ + // it's short, light (looks like whitespace), has few overloaded meaning (thanks to the recent change from ~ to unary_~) + // and we love it! + + /** A parser combinator for sequential composition + * + *

`p ~ q' succeeds if `p' succeeds and `q' succeeds on the input + * left over by `p'.

+ * + * @param q a parser that will be executed after `p' (this parser) succeeds + * @return a `Parser' that -- on success -- returns a `~' (like a Pair, but easier to pattern match on) + * that contains the result of `p' and that of `q'. + * The resulting parser fails if either `p' or `q' fails. + */ + def ~ [U](q: => Parser[U]): Parser[~[T, U]] = new Parser[~[T, U]] { + def apply(in: Input) = seq(Parser.this, q)((x, y) => new ~(x,y))(in) + override def toString = "~" + } + + def ~> [U](q: => Parser[U]): Parser[U] = new Parser[U] { + def apply(in: Input) = seq(Parser.this, q)((x, y) => y)(in) + override def toString = "~" + } + + def <~ [U](q: => Parser[U]): Parser[T] = new Parser[T] { + def apply(in: Input) = seq(Parser.this, q)((x, y) => x)(in) + override def toString = "~" + } + + /* not really useful: V cannot be inferred because Parser is covariant in first type parameter (V is always trivially Nothing) + def ~~ [U, V](q: => Parser[U])(implicit combine: (T, U) => V): Parser[V] = new Parser[V] { + def apply(in: Input) = seq(Parser.this, q)((x, y) => combine(x,y))(in) + } */ + + /** A parser combinator for non-back-tracking sequential composition + * + *

`p ~! q' succeeds if `p' succeeds and `q' succeeds on the input + * left over by `p'. In case of failure, no back-tracking is performed + * (in an earlier parser produced by the | combinator).

+ * + * @param q a parser that will be executed after `p' (this parser) succeeds + * @return a `Parser' that -- on success -- returns a `~' (like a Pair, but easier to pattern match on) + * that contains the result of `p' and that of `q'. + * The resulting parser fails if either `p' or `q' fails, this failure is fatal. + */ + def ~! [U](q: => Parser[U]): Parser[~[T, U]] = new Parser[~[T, U]] with OnceParser[~[T, U]] { + def apply(in: Input) = seq(Parser.this, commit(q))((x, y) => new ~(x,y))(in) + override def toString = "~!" + } + + /** A parser combinator for alternative composition + * + *

`p | q' succeeds if `p' succeeds or `q' succeeds + * Note that `q' is only tried if `p's failure is non-fatal (i.e., back-tracking is + * allowed).

+ * + * @param q a parser that will be executed if `p' (this parser) fails (and allows back-tracking) + * @return a `Parser' that returns the result of the first parser to succeed (out of `p' and `q') + * The resulting parser succeeds if (and only if) + */ + def | [U >: T](q: => Parser[U]): Parser[U] = new Parser[U] { + def apply(in: Input) = Parser.this(in) match { + case s1 @ Success(_, _) => s1 + case e1 @ Error(_, _) => e1 + case f1 @ Failure(_, next1) => q(in) match { + case s2 @ Success(_, _) => s2 + case f2 @ Failure(_, next2) => if (next2.pos < next1.pos) f1 else f2 + case e2 @ Error(_, next2) => if (next2.pos < next1.pos) f1 else e2 + } + } + override def toString = "|" + } + + /** A parser combinator for function application + * + *

`p ^^ f' succeeds if `p' succeeds; it returns `f' applied to the result of `p'.

+ * + * @param f a function that will be applied to this parser's result (see `map' in `ParseResult'). + * @return a parser that has the same behaviour as the current parser, but whose result is + * transformed by `f'. + */ + def ^^ [U](f: T => U): Parser[U] = new Parser[U] { + def apply(in: Input) = Parser.this(in).map(f) + override def toString = Parser.this.toString+"^^" + } + + def ^^^ [U](r: U): Parser[U] = ^^ (x => r) + + /** A parser combinator for partial function application + * + *

`p ^? (f, error)' succeeds if `p' succeeds AND `f' is defined at the result of `p'; + * in that case, it returns `f' applied to the result of `p'. If `f' is not applicable, + * error(the result of `p') should explain why.

+ * + * @param f a partial function that will be applied to this parser's result + * (see `mapPartial' in `ParseResult'). + * @param error a function that takes the same argument as `f' and produces an error message + * to explain why `f' wasn't applicable + * @return a parser that succeeds if the current parser succeeds and `f' is applicable + * to the result. If so, the result will be transformed by `f'. + */ + def ^? [U](f: PartialFunction[T, U], error: T => String): Parser[U] = new Parser[U] { + def apply(in: Input) = Parser.this(in).mapPartial(f, error) + override def toString = Parser.this.toString+"^?" + } + + /** A parser combinator for partial function application + * + *

`p ^? f' succeeds if `p' succeeds AND `f' is defined at the result of `p'; + * in that case, it returns `f' applied to the result of `p'.

+ * + * @param f a partial function that will be applied to this parser's result + * (see `mapPartial' in `ParseResult'). + * @return a parser that succeeds if the current parser succeeds and `f' is applicable + * to the result. If so, the result will be transformed by `f'. + */ + def ^? [U](f: PartialFunction[T, U]): Parser[U] = new Parser[U] { + def apply(in: Input) = Parser.this(in).mapPartial(f, result => "Constructor function not defined at "+result) + override def toString = Parser.this.toString+"^?" + } + + + /** A parser combinator that parameterises a subsequent parser with the result of this one + * + *

+ * Use this combinator when a parser depends on the result of a previous parser. `p' should be + * a function that takes the result from the first parser and returns the second parser.

+ * + *

`p into fq' (with `fq' typically `{x => q}') first applies `p', and then, if `p' successfully + * returned result `r', applies `fq(r)' to the rest of the input.

+ * + *

From: G. Hutton. Higher-order functions for parsing. J. Funct. Program., 2(3):323--343, 1992.

+ * + * @param fq a function that, given the result from this parser, returns the second parser to be applied + * @return a parser that succeeds if this parser succeeds (with result `x') and if then `fq(x)' succeeds + */ + def into[U](fq: T => Parser[U]): Parser[U] = new Parser[U] { + def apply(in: Input) = Parser.this(in) match { + case Success(result, next) => fq(result)(next) + case ns: NoSuccess => ns + } + } + + // shortcuts for combinators: + + /** Returns into(fq) */ + def >>[U](fq: T => Parser[U])=into(fq) + + + /** Returns a parser that repeatedly parses what this parser parses + * + * @return rep(this) + */ + def * = rep(this) + + /** Returns a parser that repeatedly parses what this parser parses, interleaved with the `sep' parser. + * The `sep' parser specifies how the results parsed by this parser should be combined. + * + * @return chainl1(this, sep) + */ + def *[U >: T](sep: => Parser[(U, U) => U]) = chainl1(this, sep) + + // TODO: improve precedence? a ~ b*(",") = a ~ (b*(",")) should be true + + /** Returns a parser that repeatedly (at least once) parses what this parser parses. + * + * @return rep1(this) + */ + def + = rep1(this) + + /** Returns a parser that optionally parses what this parser parses. + * + * @return opt(this) + */ + def ? = opt(this) + } + + // TODO: can this implemented in ParseResult, like map? + /** A helper method for sequential composition of (unit-)parsers + */ + private def seq[T, U, V](p: => Input => ParseResult[T], q: => Input => ParseResult[U]) + (compose: (T, U) => V) + (in: Input): ParseResult[V] + = p(in) match { + case Success(x, next1) => q(next1) match { + case Success(y, next2) => Success(compose(x, y), next2) + case ns: NoSuccess => ns + } + case ns: NoSuccess => ns + } + + /** Wrap a parser so that its failures become errors (the | combinator will give up as soon as + * it encounters an error, on failure it simply tries the next alternative) + */ + def commit[T](p: => Parser[T]) = new Parser[T] { + def apply(in: Input) = p(in) match{ + case s @ Success(_, _) => s + case e @ Error(_, _) => e + case f @ Failure(msg, next) => Error(msg, next) + } + } + + /*trait ElemFun + case class EFCons(hd: Elem => ElemFun, tl: ElemFun) extends ElemFun + case class EFNil(res: Boolean) extends ElemFun*/ + + + /** A parser matching input elements that satisfy a given predicate + * + *

elem(kind, p) succeeds if the input starts with an element `e' for which p(e) is true.

+ * + * @param kind The element kind, used for error messages + * @param p A predicate that determines which elements match. + * @return + */ + def elem(kind: String, p: Elem => Boolean) = new Parser[Elem] { + def apply(in: Input) = + if (p(in.first)) Success(in.first, in.rest) + else Failure(kind+" expected", in) + } + + /** A parser that matches only the given element `e' + * + *

elem(e) succeeds if the input starts with an element `e'

+ * + * @param e the `Elem' that must be the next piece of input for the returned parser to succeed + * @return a `Parser' that succeeds if `e' is the next available input (and returns it). + */ + def elem(e: Elem): Parser[Elem] = new Parser[Elem] { + def apply(in: Input) = + if (in.first == e) Success(e, in.rest) + else Failure("`"+e+"' expected but " + in.first + " found", in) + } + + + /** A parser that matches only the given element `e' + *

+ * The method is implicit so that elements can automatically be lifted to their parsers. + * For example, when parsing `Token's, Identifier("new") (which is a `Token') can be used directly, + * instead of first creating a `Parser' using accept(Identifier("new")).

+ * + * @param e the `Elem' that must be the next piece of input for the returned parser to succeed + * @return a `tParser' that succeeds if `e' is the next available input. + */ + implicit def accept(e: Elem): Parser[Elem] = new Parser[Elem] { + def apply(in: Input) = + if (in.first == e) Success(e, in.rest) + else Failure("`"+e+"' expected but " + in.first + " found", in) + } + + /** A parser that matches only the given list of element `es' + * + *

accept(es) succeeds if the input subsequently provides the elements in the list `es'.

+ * + * @param es the list of expected elements + * @return a Parser that recognizes a specified list of elements + */ + def accept[ES <% List[Elem]](es: ES) = new Parser[List[Elem]] { + def apply(in0: Input) = { + var these: List[Elem] = es + var in = in0 + + while(!these.isEmpty && in.first == these.head) { + these = these.tail + in = in.rest + } + + if (these.isEmpty) Success(es: List[Elem], in) + else Failure("Expected: '"+these.head+"', found: '"+in.first+"'", in0) + } + } + + /** The parser that matches an element in the domain of the partial function `f' + *

+ * If `f' is defined on the first element in the input, `f' is applied to it to produce + * this parser's result.

+ *

+ * Example: The parser accept("name", {case Identifier(n) => Name(n)}) + * accepts an Identifier(n) and returns a Name(n).

+ * + * @param expected a description of the kind of element this parser expects (for error messages) + * @param f a partial function that determines when this parser is successful and what its output is + * @return A parser that succeeds if `f' is applicable to the first element of the input, + * applying `f' to it to produce the result. + */ + def accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = new Parser[U] { + def apply(in: Input) = + if (f.isDefinedAt(in.first)) Success(f(in.first), in.rest) + else Failure(expected+" expected", in) + } + + /** A parser that always fails + * + * @param msg The error message describing the failure. + * @return A parser that always fails with the specified error message. + */ + def failure(msg: String) = new Parser[Nothing] { + def apply(in: Input) = Failure(msg, in) + } + + /** A parser that results in an error + * + * @param msg The error message describing the failure. + * @return A parser that always fails with the specified error message. + */ + def err(msg: String) = new Parser[Nothing] { + def apply(in: Input) = Error(msg, in) + } + + /** A parser that always succeeds + * + * @param v The result for the parser + * @return A parser that always succeeds, with the given result `v' + */ + def success[T](v: T) = new Parser[T] { + def apply(in: Input) = Success(v, in) + } + + def log[T](p: => Parser[T])(name: String): Parser[T] = new Parser[T] { + def apply(in: Input) = {println("trying "+name+" at "+in.pos); val r = p(in); println(name+" --> "+r); r } + } + + /** A parser generator for repetitions. + * + *

rep(p) repeatedly uses `p' to parse the input until `p' fails (the result is a List + * of the consecutive results of `p')

+ * + * @param p a `Parser' that is to be applied successively to the input + * @return A parser that returns a list of results produced by repeatedly applying `p' to the input. + */ + def rep[T](p: => Parser[T]): Parser[List[T]] = rep1(p) | success(List()) + + /** A parser generator for interleaved repetitions. + * + *

repsep(p, q) repeatedly uses `p' interleaved with `q' to parse the input, until `p' fails. + * (The result is a `List' of the results of `p'.)

+ * + *

Example: repsep(term, ",") parses a comma-separated list of term's, + * yielding a list of these terms

+ * + * @param p a `Parser' that is to be applied successively to the input + * @param q a `Parser' that parses the elements that separate the elements parsed by `p' + * @return A parser that returns a list of results produced by repeatedly applying `p' (interleaved + * with `q') to the input. + * The results of `p' are collected in a list. The results of `q' are discarded. + */ + def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = + rep1sep(p, q) | success(List()) + + /** A parser generator for non-empty repetitions. + * + *

rep1(p) repeatedly uses `p' to parse the input until `p' fails -- `p' must succeed at least + * once (the result is a `List' of the consecutive results of `p')

+ * + * @param p a `Parser' that is to be applied successively to the input + * @return A parser that returns a list of results produced by repeatedly applying `p' to the input + * (and that only succeeds if `p' matches at least once). + */ + def rep1[T](p: => Parser[T]): Parser[List[T]] = rep1(p, p) + + /** A parser generator for non-empty repetitions. + * + *

rep1(f, p) first uses `f' (which must succeed) and then repeatedly uses `p' to + * parse the input until `p' fails + * (the result is a `List' of the consecutive results of `f' and `p')

+ * + * @param first a `Parser' that parses the first piece of input + * @param p a `Parser' that is to be applied successively to the rest of the input (if any) + * @return A parser that returns a list of results produced by first applying `f' and then + * repeatedly `p' to the input (it only succeeds if `f' matches). + */ + def rep1[T](first: => Parser[T], p: => Parser[T]): Parser[List[T]] = first ~ rep(p) ^^ { case ~(x, xs) => x :: xs } + + /* new Parser[List[T]] { + def apply(in0: Input) = { + val xs = new scala.collection.mutable.ListBuffer[T] + var in = in0 + + var res = first(in) + + while(res.successful) { + xs += res.get + in = res.next + res = p(in) + } + + if (!xs.isEmpty) Success(xs.toList, res.next) + else Failure("TODO", TODO) + } + }*/ + + /** A parser generator for a specified number of repetitions. + * + *

repN(n, p) uses `p' exactly `n' time to parse the input + * (the result is a `List' of the `n' consecutive results of `p')

+ * + * @param p a `Parser' that is to be applied successively to the input + * @param n the exact number of times `p' must succeed + * @return A parser that returns a list of results produced by repeatedly applying `p' to the input + * (and that only succeeds if `p' matches exactly `n' times). + */ + def repN[T](n: Int, p: => Parser[T]): Parser[List[T]] = + if(n==0) success(Nil) else p ~ repN(n-1, p) ^^ { case ~(x, xs) => x :: xs } + + /** A parser generator for non-empty repetitions. + * + *

rep1sep(first, p, q) starts by using `first', followed by repeatedly uses of `p' interleaved with `q' + * to parse the input, until `p' fails. `first' must succeed (the result is a `List' of the + * consecutive results of `first' and `p')

+ * + * @param first a `Parser' that is to be applied to the first element of input + * @param p a `Parser' that is to be applied successively to the input + * @param q a `Parser' that parses the elements that separate the elements parsed by `p' + * (interleaved with `q') + * @return A parser that returns a list of results produced by repeatedly applying `p' to the input + * (and that only succeeds if `p' matches at least once). + * The results of `p' are collected in a list. The results of `q' are discarded. + */ + def rep1sep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = + p ~ (q ~ rep1sep(p, q) ^^ { case x ~ y => y } | success(List())) ^^ { case x ~ y => x :: y } + + /** A parser generator that, roughly, generalises the rep1sep generator so that `q', which parses the separator, + * produces a left-associative function that combines the elements it separates. + * + *

From: J. Fokker. Functional parsers. In J. Jeuring and E. Meijer, editors, Advanced Functional Programming, volume 925 of Lecture Notes in Computer Science, pages 1--23. Springer, 1995.

+ * + * @param p a parser that parses the elements + * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that + * combines two elements into one + */ + def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] = + p ~ rep(q ~ p) ^^ {case x ~ xs => xs.foldLeft(x)((a, f_b) => f_b._1(a, f_b._2))} // ((a, {f, b}) => f(a, b)) + + /** A parser generator that, roughly, generalises the rep1sep generator so that `q', which parses the separator, + * produces a left-associative function that combines the elements it separates. + * + * @param first a parser that parses the first element + * @param p a parser that parses the subsequent elements + * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that + * combines two elements into one + */ + def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] = + first ~ rep(q ~ p) ^^ {case x ~ xs => xs.foldLeft(x)((a, f_b) => f_b._1(a, f_b._2))} // ((a, {f, b}) => f(a, b)) + + /** A parser generator that generalises the rep1sep generator so that `q', which parses the separator, + * produces a right-associative function that combines the elements it separates. Additionally, + * The right-most (last) element and the left-most combinating function have to be supplied. + * + * rep1sep(p: Parser[T], q) corresponds to chainr1(p, q ^^ cons, cons, Nil) (where val cons = (x: T, y: List[T]) => x :: y) + * + * @param p a parser that parses the elements + * @param q a parser that parses the token(s) separating the elements, yielding a right-associative function that + * combines two elements into one + * @param combine the "last" (left-most) combination function to be applied + * @param first the "first" (right-most) element to be combined + */ + def chainr1[T, U](p: Parser[T], q: Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] = + p ~ rep(q ~ p) ^^ {case x ~ xs => + (new ~(combine, x) :: xs).foldRight(first)((f_a, b) => f_a._1(f_a._2, b))} // (({f, a}, b) => f(a, b)) + + /** A parser generator for optional sub-phrases. + * + *

opt(p) is a parser that returns `Some(x)' if `p' returns `x' and `None' if `p' fails

+ * + * @param p A `Parser' that is tried on the input + * @return a `Parser' that always succeeds: either with the result provided by `p' or + * with the empty result + */ + def opt[T](p: => Parser[T]): Parser[Option[T]] = + p ^^ (x => Some(x)) | success(None) + + /** `positioned' decorates a parser's result with the start position of the input it consumed. + * + * @param p a `Parser' whose result conforms to `Positional'. + * @return A parser that has the same behaviour as `p', but which marks its result with the + * start position of the input it consumed, if it didn't already have a position. + */ + def positioned[T <: Positional](p: => Parser[T]): Parser[T] = new Parser[T] { + def apply(in: Input) = p(in) match { + case Success(t, in1) => Success(if (t.pos == NoPosition) t setPos in.pos else t, in1) + case ns: NoSuccess => ns + } + } + + case class ~[+a, +b](_1: a, _2: b) { + override def toString = "("+ _1 +" ~ "+ _2 +")" + } + + /** A function that always returns a given constant; useful as right-hand argument of ^^ + */ + + def const[T, U] (result: U): T => U = x => result + + /** A parser whose ~ combinator disallows back-tracking. + */ + trait OnceParser[+T] extends Parser[T] { + override def ~ [U](q: => Parser[U]): Parser[~[T, U]] = new Parser[~[T, U]] with OnceParser[~[T, U]] { + def apply(in: Input) = seq(OnceParser.this, commit(q))((x, y) => new ~(x, y))(in) + } + } +} diff --git a/src/library/scala/util/parsing/combinator1/lexical/Lexical.scala b/src/library/scala/util/parsing/combinator1/lexical/Lexical.scala new file mode 100644 index 0000000000..31ecdc629d --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/lexical/Lexical.scala @@ -0,0 +1,41 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: Lexical.scala 12268 2007-07-11 13:45:53Z michelou $ + + +package scala.util.parsing.combinator1.lexical + +import scala.util.parsing.syntax._ +import scala.util.parsing.input.CharArrayReader.EofCh + +/**

+ * This component complements the Scanners component with + * common operations for lexical parsers. + *

+ *

+ * {@see StdLexical} for a concrete implementation for a simple, Scala-like + * language. + *

+ * + * @author Martin Odersky, Adriaan Moors + */ +abstract class Lexical extends Scanners with Tokens { + + /** A character-parser that matches a letter (and returns it)*/ + def letter = elem("letter", _.isLetter) + + /** A character-parser that matches a digit (and returns it)*/ + def digit = elem("digit", _.isDigit) + + /** A character-parser that matches any character except the ones given in `cs' (and returns it)*/ + def chrExcept(cs: Char*) = elem("", ch => (cs forall (ch !=))) + + /** A character-parser that matches a white-space character (and returns it)*/ + def whitespaceChar = elem("space char", ch => ch <= ' ' && ch != EofCh) +} diff --git a/src/library/scala/util/parsing/combinator1/lexical/Scanners.scala b/src/library/scala/util/parsing/combinator1/lexical/Scanners.scala new file mode 100644 index 0000000000..af5c141267 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/lexical/Scanners.scala @@ -0,0 +1,75 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: Scanners.scala 12407 2007-07-25 02:33:18Z emir $ + + +package scala.util.parsing.combinator1.lexical + +import scala.util.parsing.syntax._ +import scala.util.parsing.input._ + +/**

+ * This component provides core functionality for lexical parsers. + *

+ *

+ * See its subclasses {@see Lexical} and -- most interestingly + * {@see StdLexical}, for more functionality. + *

+ * + * @requires token a parser that produces a token (from a stream of characters) + * @requires whitespace a unit-parser for white-space + * @provides Scanner essentially a parser that parses a stream of characters + * to produce `Token's, which are typically passed to a + * syntactical parser (which operates on `Token's, not on + * individual characters). + * + * @author Martin Odersky, Adriaan Moors + */ +trait Scanners extends Parsers { + type Elem = Char + type Token + + /** This token is produced by a scanner {@see Scanner} when scanning failed. */ + def errorToken(msg: String): Token + + /** a parser that produces a token (from a stream of characters) */ + def token: Parser[Token] + + /** a parser for white-space -- its result will be discarded */ + def whitespace: Parser[Any] + + /**

+ * Scanner is essentially(*) a parser that produces `Token's + * from a stream of characters. The tokens it produces are typically + * passed to parsers in TokenParsers. + *

+ *

+ * Note: (*) Scanner is really a `Reader' of `Token's + *

+ */ + class Scanner(in: Reader[Char]) extends Reader[Token] { + /** Convenience constructor (makes a character reader out of the given string) */ + def this(in: String) = this(new CharArrayReader(in.toCharArray())) + private val (tok, rest1, rest2) = whitespace(in) match { + case Success(_, in1) => + token(in1) match { + case Success(tok, in2) => (tok, in1, in2) + case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) + } + case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) + } + private def skip(in: Reader[Char]) = if (in.atEnd) in else in.rest + + def first = tok + def rest = new Scanner(rest2) + def pos = rest1.pos + def atEnd = in.atEnd || (whitespace(in) match { case Success(_, in1) => in1.atEnd case _ => false }) + } +} + diff --git a/src/library/scala/util/parsing/combinator1/lexical/StdLexical.scala b/src/library/scala/util/parsing/combinator1/lexical/StdLexical.scala new file mode 100644 index 0000000000..fc04116692 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/lexical/StdLexical.scala @@ -0,0 +1,90 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: StdLexical.scala 12242 2007-07-09 09:43:09Z michelou $ + + +package scala.util.parsing.combinator1.lexical + +import scala.util.parsing.syntax._ +import scala.util.parsing.input.CharArrayReader.EofCh +import collection.mutable.HashSet + +/**

+ * This component provides a standard lexical parser for a simple, Scala-like language. + * It parses keywords and identifiers, numeric literals (integers), strings, and delimiters. + *

+ *

+ * To distinguish between identifiers and keywords, it uses a set of reserved identifiers: + * every string contained in `reserved' is returned as a keyword token. + * (Note that "=>" is hard-coded as a keyword.) + * Additionally, the kinds of delimiters can be specified by the `delimiters' set. + *

+ *

+ * Usually this component is used to break character-based input into bigger tokens, + * which are then passed to a token-parser {@see TokenParsers}. + *

+ * + * @author Martin Odersky, Iulian Dragos, Adriaan Moors + */ +class StdLexical extends Lexical with StdTokens { + // see `token' in `Scanners' + def token: Parser[Token] = + ( letter ~ rep( letter | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } + | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } + | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case _ ~ chars ~ _ => StringLit(chars mkString "") } + | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case _ ~ chars ~ _ => StringLit(chars mkString "") } + | EofCh ^^ const(EOF) + | '\'' ~> failure("unclosed string literal") + | '\"' ~> failure("unclosed string literal") + | delim + | failure("illegal character") + ) + + // see `whitespace in `Scanners' + def whitespace: Parser[Any] = rep( + whitespaceChar + | '/' ~ '*' ~ comment + | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) + | '/' ~ '*' ~ failure("unclosed comment") + ) + + protected def comment: Parser[Any] = ( + '*' ~ '/' ^^ { case _ => ' ' } + | chrExcept(EofCh) ~ comment + ) + + /** The set of reserved identifiers: these will be returned as `Keyword's */ + val reserved = new HashSet[String] + + /** The set of delimiters (ordering does not matter) */ + val delimiters = new HashSet[String] + + protected def processIdent(name: String) = + if (reserved contains name) Keyword(name) else Identifier(name) + + private var _delim: Parser[Token] = null + protected def delim: Parser[Token] = { + if(_delim eq null) { // construct parser for delimiters by |'ing together the parsers for the individual delimiters, + // starting with the longest one (hence the sort + reverse) -- otherwise a delimiter D will never be matched if + // there is another delimiter that is a prefix of D + def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) } + + val d = new Array[String](delimiters.size) + delimiters.copyToArray(d,0) + scala.util.Sorting.quickSort(d) + _delim = d.toList.reverse.map(parseDelim).reduceRight[Parser[Token]](_ | _) // no offence :-) + } + + _delim + } + + private def lift[T](f: String => T)(xs: List[Char]): T = f(xs.mkString("", "", "")) + + private def lift2[T](f: String => T)(p: ~[Char, List[Char]]): T = lift(f)(p._1 :: p._2) +} diff --git a/src/library/scala/util/parsing/combinator1/syntactical/StandardTokenParsers.scala b/src/library/scala/util/parsing/combinator1/syntactical/StandardTokenParsers.scala new file mode 100644 index 0000000000..68d0d07c39 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/syntactical/StandardTokenParsers.scala @@ -0,0 +1,24 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: StdTokenParsers.scala 12242 2007-07-09 09:43:09Z michelou $ + + +package scala.util.parsing.combinator1.syntactical + +import scala.util.parsing.syntax._ +import scala.util.parsing.combinator1.lexical.StdLexical + +/** This component provides primitive parsers for the standard tokens defined in `StdTokens'. +* +* @author Martin Odersky, Adriaan Moors + */ +class StandardTokenParsers extends StdTokenParsers { + type Tokens = StdTokens + val lexical = new StdLexical +} diff --git a/src/library/scala/util/parsing/combinator1/syntactical/StdTokenParsers.scala b/src/library/scala/util/parsing/combinator1/syntactical/StdTokenParsers.scala new file mode 100644 index 0000000000..e344f988f8 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/syntactical/StdTokenParsers.scala @@ -0,0 +1,44 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: StdTokenParsers.scala 12242 2007-07-09 09:43:09Z michelou $ + + +package scala.util.parsing.combinator1.syntactical + +import scala.util.parsing.syntax._ + +/** This component provides primitive parsers for the standard tokens defined in `StdTokens'. +* +* @author Martin Odersky, Adriaan Moors + */ +trait StdTokenParsers extends TokenParsers { + type Tokens <: StdTokens + import lexical.{Keyword, NumericLit, StringLit, Identifier} + + /** A parser which matches a single keyword token. + * + * @param chars The character string making up the matched keyword. + * @return a `Parser' that matches the given string + */ + implicit def keyword(chars: String): Parser[String] = accept(Keyword(chars)) ^^ (_.chars) + + /** A parser which matches a numeric literal */ + def numericLit: Parser[String] = + elem("number", _.isInstanceOf[NumericLit]) ^^ (_.chars) + + /** A parser which matches a string literal */ + def stringLit: Parser[String] = + elem("string literal", _.isInstanceOf[StringLit]) ^^ (_.chars) + + /** A parser which matches an identifier */ + def ident: Parser[String] = + elem("identifier", _.isInstanceOf[Identifier]) ^^ (_.chars) +} + + diff --git a/src/library/scala/util/parsing/combinator1/syntactical/TokenParsers.scala b/src/library/scala/util/parsing/combinator1/syntactical/TokenParsers.scala new file mode 100644 index 0000000000..d4f8a9dfc8 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/syntactical/TokenParsers.scala @@ -0,0 +1,64 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id: TokenParsers.scala 12242 2007-07-09 09:43:09Z michelou $ + + +package scala.util.parsing.combinator1.syntactical + +/**

+ * This is the core component for token-based parsers. + *

+ *

+ * @requires lexical a component providing the tokens consumed by the + * parsers in this component. + *

+ * + * @author Martin Odersky, Adriaan Moors + */ +trait TokenParsers extends Parsers { + /** Tokens is the abstract type of the `Token's consumed by the parsers in this component*/ + type Tokens <: scala.util.parsing.syntax.Tokens + + /** lexical is the component responsible for consuming some basic kind of + * input (usually character-based) and turning it into the tokens + * understood by these parsers. + */ + val lexical: Tokens + + /** The input-type for these parsers*/ + type Elem = lexical.Token + + /**

+ * A parser generator delimiting whole phrases (i.e. programs). + *

+ *

+ * phrase(p) succeeds if p succeeds and + * no input is left over after p. + *

+ * + * @param p the parser that must consume all input for the resulting parser + * to succeed. + * @return a parser that has the same result as `p', but that only succeeds + * if p consumed all the input. + */ + def phrase[t](p: Parser[t]) = new Parser[t] { + def apply(in: Input) = p(in) match { + case s @ Success(out, in1) if in1.atEnd => s + case s @ Success(out, in1) => Failure("end of input expected", in1) + case f @ Failure(_, in1) => in1.first match { + case lexical.ErrorToken(msg) => Failure(msg, in1) + case lexical.EOF => Failure("unexpected end of input", in1) + case t => Failure("unexpected token "+t, in1) + } + case f => f + } + } +} + + diff --git a/src/library/scala/util/parsing/combinator1/testing/Tester.scala b/src/library/scala/util/parsing/combinator1/testing/Tester.scala new file mode 100644 index 0000000000..e2f5d61e53 --- /dev/null +++ b/src/library/scala/util/parsing/combinator1/testing/Tester.scala @@ -0,0 +1,45 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2007, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.util.parsing.combinator1.testing + +import scala.util.parsing.combinator1.lexical.Lexical +import scala.util.parsing.combinator1.syntactical.TokenParsers + +/**

+ * Facilitates testing a given parser on various input strings. + *

+ *

+ * Example use: + *

+ *    val syntactic = new MyParsers
+ *

+ * and + *

+ *    val parser = syntactic.term
+ *

+ * (if MyParsers extends TokenParsers with a parser called `term') + *

+ * + * @author Martin Odersky, Adriaan Moors + */ +abstract class Tester { + + val syntactic: TokenParsers { val lexical: Lexical } + val parser: syntactic.Parser[Any] + + + /** Scans a String (using a `syntactic.lexical.Scanner'), parses it + * using phrase(parser), and prints the input and the + * parsed result to the console. + */ + def test(in: String) { + Console.println("\nin : "+in) + Console.println(syntactic.phrase[Any](parser)(new syntactic.lexical.Scanner(in))) + } +} -- cgit v1.2.3