From 46a4635d3acc0a18869131879e6cde862d6b9776 Mon Sep 17 00:00:00 2001 From: Adriaan Moors Date: Mon, 24 Jun 2013 16:41:53 -0700 Subject: Spin off parser combinators to scala-parser-combinators.jar. --- .gitignore | 2 +- build.xml | 44 +- src/build/bnd/scala-parser-combinators.bnd | 5 + src/build/maven/maven-deploy.xml | 1 + src/build/maven/scala-parser-combinators-pom.xml | 59 ++ src/build/pack.xml | 5 + .../parsing/combinator/ImplicitConversions.scala | 43 - .../util/parsing/combinator/JavaTokenParsers.scala | 62 -- .../util/parsing/combinator/PackratParsers.scala | 312 ------- .../scala/util/parsing/combinator/Parsers.scala | 919 --------------------- .../util/parsing/combinator/RegexParsers.scala | 166 ---- .../util/parsing/combinator/lexical/Lexical.scala | 40 - .../util/parsing/combinator/lexical/Scanners.scala | 63 -- .../parsing/combinator/lexical/StdLexical.scala | 87 -- .../syntactical/StandardTokenParsers.scala | 32 - .../combinator/syntactical/StdTokenParsers.scala | 52 -- .../combinator/syntactical/TokenParsers.scala | 35 - .../util/parsing/combinator/token/StdTokens.scala | 39 - .../util/parsing/combinator/token/Tokens.scala | 43 - .../scala/util/parsing/input/CharArrayReader.scala | 35 - .../util/parsing/input/CharSequenceReader.scala | 66 -- .../scala/util/parsing/input/NoPosition.scala | 25 - .../scala/util/parsing/input/OffsetPosition.scala | 73 -- .../scala/util/parsing/input/PagedSeqReader.scala | 71 -- .../scala/util/parsing/input/Position.scala | 62 -- .../scala/util/parsing/input/Positional.scala | 30 - src/library/scala/util/parsing/input/Reader.scala | 62 -- .../scala/util/parsing/input/StreamReader.scala | 76 -- src/library/scala/util/parsing/json/JSON.scala | 97 --- src/library/scala/util/parsing/json/Lexer.scala | 90 -- src/library/scala/util/parsing/json/Parser.scala | 147 ---- .../parsing/combinator/ImplicitConversions.scala | 43 + .../util/parsing/combinator/JavaTokenParsers.scala | 62 ++ .../util/parsing/combinator/PackratParsers.scala | 312 +++++++ .../scala/util/parsing/combinator/Parsers.scala | 919 +++++++++++++++++++++ .../util/parsing/combinator/RegexParsers.scala | 166 ++++ .../util/parsing/combinator/lexical/Lexical.scala | 40 + .../util/parsing/combinator/lexical/Scanners.scala | 63 ++ .../parsing/combinator/lexical/StdLexical.scala | 87 ++ .../syntactical/StandardTokenParsers.scala | 32 + .../combinator/syntactical/StdTokenParsers.scala | 52 ++ .../combinator/syntactical/TokenParsers.scala | 35 + .../util/parsing/combinator/token/StdTokens.scala | 39 + .../util/parsing/combinator/token/Tokens.scala | 43 + .../scala/util/parsing/input/CharArrayReader.scala | 35 + .../util/parsing/input/CharSequenceReader.scala | 66 ++ .../scala/util/parsing/input/NoPosition.scala | 25 + .../scala/util/parsing/input/OffsetPosition.scala | 73 ++ .../scala/util/parsing/input/PagedSeqReader.scala | 71 ++ .../scala/util/parsing/input/Position.scala | 62 ++ .../scala/util/parsing/input/Positional.scala | 30 + .../scala/util/parsing/input/Reader.scala | 62 ++ .../scala/util/parsing/input/StreamReader.scala | 76 ++ .../scala/util/parsing/json/JSON.scala | 97 +++ .../scala/util/parsing/json/Lexer.scala | 90 ++ .../scala/util/parsing/json/Parser.scala | 147 ++++ .../scala/tools/partest/nest/FileManager.scala | 3 +- test/partest | 2 +- 58 files changed, 2837 insertions(+), 2738 deletions(-) create mode 100644 src/build/bnd/scala-parser-combinators.bnd create mode 100644 src/build/maven/scala-parser-combinators-pom.xml delete mode 100644 src/library/scala/util/parsing/combinator/ImplicitConversions.scala delete mode 100644 src/library/scala/util/parsing/combinator/JavaTokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/PackratParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/Parsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/RegexParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/lexical/Lexical.scala delete mode 100644 src/library/scala/util/parsing/combinator/lexical/Scanners.scala delete mode 100644 src/library/scala/util/parsing/combinator/lexical/StdLexical.scala delete mode 100644 src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/token/StdTokens.scala delete mode 100644 src/library/scala/util/parsing/combinator/token/Tokens.scala delete mode 100644 src/library/scala/util/parsing/input/CharArrayReader.scala delete mode 100644 src/library/scala/util/parsing/input/CharSequenceReader.scala delete mode 100644 src/library/scala/util/parsing/input/NoPosition.scala delete mode 100644 src/library/scala/util/parsing/input/OffsetPosition.scala delete mode 100644 src/library/scala/util/parsing/input/PagedSeqReader.scala delete mode 100644 src/library/scala/util/parsing/input/Position.scala delete mode 100644 src/library/scala/util/parsing/input/Positional.scala delete mode 100644 src/library/scala/util/parsing/input/Reader.scala delete mode 100644 src/library/scala/util/parsing/input/StreamReader.scala delete mode 100644 src/library/scala/util/parsing/json/JSON.scala delete mode 100644 src/library/scala/util/parsing/json/Lexer.scala delete mode 100644 src/library/scala/util/parsing/json/Parser.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/Parsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/NoPosition.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/Position.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/Positional.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/Reader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/StreamReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/json/JSON.scala create mode 100644 src/parser-combinators/scala/util/parsing/json/Lexer.scala create mode 100644 src/parser-combinators/scala/util/parsing/json/Parser.scala diff --git a/.gitignore b/.gitignore index 378eac25d3..84c048a73c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -build +/build/ diff --git a/build.xml b/build.xml index 854bb9c68b..6906c15a19 100755 --- a/build.xml +++ b/build.xml @@ -472,7 +472,7 @@ TODO: There must be a variable of the shape @{stage}.@{project}.build.path for all @{stage} in locker, quick, strap and all @{project} in library, reflect, compiler - when stage is quick, @{project} also includes: actors, repl, xml, swing, plugins, scalacheck, interactive, scaladoc, partest, scalap + when stage is quick, @{project} also includes: actors, parser-combinators, xml, repl, swing, plugins, scalacheck, interactive, scaladoc, partest, scalap --> @@ -510,6 +510,11 @@ TODO: + + + + + @@ -545,6 +550,7 @@ TODO: + @@ -566,6 +572,7 @@ TODO: + @@ -577,6 +584,7 @@ TODO: + @@ -596,6 +604,8 @@ TODO: + + @@ -622,7 +632,8 @@ TODO: - + + @@ -663,6 +674,7 @@ TODO: + @@ -689,6 +701,7 @@ TODO: + @@ -718,6 +731,7 @@ TODO: + @@ -1162,6 +1176,9 @@ TODO: + + + @@ -1171,7 +1188,7 @@ TODO: - + @@ -1180,7 +1197,7 @@ TODO: - + @@ -1225,7 +1242,7 @@ TODO: - + @@ -1240,7 +1257,8 @@ TODO: - + + @@ -1277,7 +1295,7 @@ TODO: - + @@ -1381,6 +1399,7 @@ TODO: + @@ -1707,6 +1726,12 @@ TODO: + + + + + + @@ -1775,7 +1800,7 @@ TODO: - + "+ r) - r - } - - /** A parser generator for repetitions. - * - * `rep(p)` repeatedly uses `p` to parse the input until `p` fails - * (the result is a List of the consecutive results of `p`). - * - * @param p a `Parser` that is to be applied successively to the input - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input. - */ - def rep[T](p: => Parser[T]): Parser[List[T]] = rep1(p) | success(List()) - - /** A parser generator for interleaved repetitions. - * - * `repsep(p, q)` repeatedly uses `p` interleaved with `q` to parse the input, until `p` fails. - * (The result is a `List` of the results of `p`.) - * - * Example: `repsep(term, ",")` parses a comma-separated list of term's, yielding a list of these terms. - * - * @param p a `Parser` that is to be applied successively to the input - * @param q a `Parser` that parses the elements that separate the elements parsed by `p` - * @return A parser that returns a list of results produced by repeatedly applying `p` (interleaved with `q`) to the input. - * The results of `p` are collected in a list. The results of `q` are discarded. - */ - def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = - rep1sep(p, q) | success(List()) - - /** A parser generator for non-empty repetitions. - * - * `rep1(p)` repeatedly uses `p` to parse the input until `p` fails -- `p` must succeed at least - * once (the result is a `List` of the consecutive results of `p`) - * - * @param p a `Parser` that is to be applied successively to the input - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input - * (and that only succeeds if `p` matches at least once). - */ - def rep1[T](p: => Parser[T]): Parser[List[T]] = rep1(p, p) - - /** A parser generator for non-empty repetitions. - * - * `rep1(f, p)` first uses `f` (which must succeed) and then repeatedly - * uses `p` to parse the input until `p` fails - * (the result is a `List` of the consecutive results of `f` and `p`) - * - * @param first a `Parser` that parses the first piece of input - * @param p0 a `Parser` that is to be applied successively to the rest of the input (if any) -- evaluated at most once, and only when necessary - * @return A parser that returns a list of results produced by first applying `f` and then - * repeatedly `p` to the input (it only succeeds if `f` matches). - */ - @migration("The `p0` call-by-name arguments is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") - def rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]] = Parser { in => - lazy val p = p0 // lazy argument - val elems = new ListBuffer[T] - - def continue(in: Input): ParseResult[List[T]] = { - val p0 = p // avoid repeatedly re-evaluating by-name parser - @tailrec def applyp(in0: Input): ParseResult[List[T]] = p0(in0) match { - case Success(x, rest) => elems += x ; applyp(rest) - case e @ Error(_, _) => e // still have to propagate error - case _ => Success(elems.toList, in0) - } - - applyp(in) - } - - first(in) match { - case Success(x, rest) => elems += x ; continue(rest) - case ns: NoSuccess => ns - } - } - - /** A parser generator for a specified number of repetitions. - * - * `repN(n, p)` uses `p` exactly `n` time to parse the input - * (the result is a `List` of the `n` consecutive results of `p`). - * - * @param p a `Parser` that is to be applied successively to the input - * @param num the exact number of times `p` must succeed - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input - * (and that only succeeds if `p` matches exactly `n` times). - */ - def repN[T](num: Int, p: => Parser[T]): Parser[List[T]] = - if (num == 0) success(Nil) else Parser { in => - val elems = new ListBuffer[T] - val p0 = p // avoid repeatedly re-evaluating by-name parser - - @tailrec def applyp(in0: Input): ParseResult[List[T]] = - if (elems.length == num) Success(elems.toList, in0) - else p0(in0) match { - case Success(x, rest) => elems += x ; applyp(rest) - case ns: NoSuccess => ns - } - - applyp(in) - } - - /** A parser generator for non-empty repetitions. - * - * `rep1sep(p, q)` repeatedly applies `p` interleaved with `q` to parse the - * input, until `p` fails. The parser `p` must succeed at least once. - * - * @param p a `Parser` that is to be applied successively to the input - * @param q a `Parser` that parses the elements that separate the elements parsed by `p` - * (interleaved with `q`) - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input - * (and that only succeeds if `p` matches at least once). - * The results of `p` are collected in a list. The results of `q` are discarded. - */ - def rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] = - p ~ rep(q ~> p) ^^ {case x~y => x::y} - - /** A parser generator that, roughly, generalises the rep1sep generator so - * that `q`, which parses the separator, produces a left-associative - * function that combines the elements it separates. - * - * ''From: J. Fokker. Functional parsers. In J. Jeuring and E. Meijer, editors, Advanced Functional Programming, - * volume 925 of Lecture Notes in Computer Science, pages 1--23. Springer, 1995.'' - * - * @param p a parser that parses the elements - * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that - * combines two elements into one - */ - def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] - = chainl1(p, p, q) - - /** A parser generator that, roughly, generalises the `rep1sep` generator - * so that `q`, which parses the separator, produces a left-associative - * function that combines the elements it separates. - * - * @param first a parser that parses the first element - * @param p a parser that parses the subsequent elements - * @param q a parser that parses the token(s) separating the elements, - * yielding a left-associative function that combines two elements - * into one - */ - def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] - = first ~ rep(q ~ p) ^^ { - case x ~ xs => xs.foldLeft(x: T){case (a, f ~ b) => f(a, b)} // x's type annotation is needed to deal with changed type inference due to SI-5189 - } - - /** A parser generator that generalises the `rep1sep` generator so that `q`, - * which parses the separator, produces a right-associative function that - * combines the elements it separates. Additionally, the right-most (last) - * element and the left-most combining function have to be supplied. - * - * rep1sep(p: Parser[T], q) corresponds to chainr1(p, q ^^ cons, cons, Nil) (where val cons = (x: T, y: List[T]) => x :: y) - * - * @param p a parser that parses the elements - * @param q a parser that parses the token(s) separating the elements, yielding a right-associative function that - * combines two elements into one - * @param combine the "last" (left-most) combination function to be applied - * @param first the "first" (right-most) element to be combined - */ - def chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] - = p ~ rep(q ~ p) ^^ { - case x ~ xs => (new ~(combine, x) :: xs).foldRight(first){case (f ~ a, b) => f(a, b)} - } - - /** A parser generator for optional sub-phrases. - * - * `opt(p)` is a parser that returns `Some(x)` if `p` returns `x` and `None` if `p` fails. - * - * @param p A `Parser` that is tried on the input - * @return a `Parser` that always succeeds: either with the result provided by `p` or - * with the empty result - */ - def opt[T](p: => Parser[T]): Parser[Option[T]] = - p ^^ (x => Some(x)) | success(None) - - /** Wrap a parser so that its failures and errors become success and - * vice versa -- it never consumes any input. - */ - def not[T](p: => Parser[T]): Parser[Unit] = Parser { in => - p(in) match { - case Success(_, _) => Failure("Expected failure", in) - case _ => Success((), in) - } - } - - /** A parser generator for guard expressions. The resulting parser will - * fail or succeed just like the one given as parameter but it will not - * consume any input. - * - * @param p a `Parser` that is to be applied to the input - * @return A parser that returns success if and only if `p` succeeds but - * never consumes any input - */ - def guard[T](p: => Parser[T]): Parser[T] = Parser { in => - p(in) match{ - case s@ Success(s1,_) => Success(s1, in) - case e => e - } - } - - /** `positioned` decorates a parser's result with the start position of the - * input it consumed. - * - * @param p a `Parser` whose result conforms to `Positional`. - * @return A parser that has the same behaviour as `p`, but which marks its - * result with the start position of the input it consumed, - * if it didn't already have a position. - */ - def positioned[T <: Positional](p: => Parser[T]): Parser[T] = Parser { in => - p(in) match { - case Success(t, in1) => Success(if (t.pos == NoPosition) t setPos in.pos else t, in1) - case ns: NoSuccess => ns - } - } - - /** A parser generator delimiting whole phrases (i.e. programs). - * - * `phrase(p)` succeeds if `p` succeeds and no input is left over after `p`. - * - * @param p the parser that must consume all input for the resulting parser - * to succeed. - * @return a parser that has the same result as `p`, but that only succeeds - * if `p` consumed all the input. - */ - def phrase[T](p: Parser[T]) = new Parser[T] { - def apply(in: Input) = lastNoSuccessVar.withValue(None) { - p(in) match { - case s @ Success(out, in1) => - if (in1.atEnd) - s - else - lastNoSuccessVar.value filterNot { _.next.pos < in1.pos } getOrElse Failure("end of input expected", in1) - case ns => lastNoSuccessVar.value.getOrElse(ns) - } - } - } - - /** Given a concatenation with a repetition (list), move the concatenated element into the list */ - def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } - - /** A wrapper over sequence of matches. - * - * Given `p1: Parser[A]` and `p2: Parser[B]`, a parser composed with - * `p1 ~ p2` will have type `Parser[~[A, B]]`. The successful result - * of the parser can be extracted from this case class. - * - * It also enables pattern matching, so something like this is possible: - * - * {{{ - * def concat(p1: Parser[String], p2: Parser[String]): Parser[String] = - * p1 ~ p2 ^^ { case a ~ b => a + b } - * }}} - */ - case class ~[+a, +b](_1: a, _2: b) { - override def toString = "("+ _1 +"~"+ _2 +")" - } - - /** A parser whose `~` combinator disallows back-tracking. - */ - trait OnceParser[+T] extends Parser[T] { - override def ~ [U](p: => Parser[U]): Parser[~[T, U]] - = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~") } - } -} diff --git a/src/library/scala/util/parsing/combinator/RegexParsers.scala b/src/library/scala/util/parsing/combinator/RegexParsers.scala deleted file mode 100644 index 8ebbc573ad..0000000000 --- a/src/library/scala/util/parsing/combinator/RegexParsers.scala +++ /dev/null @@ -1,166 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.combinator - -import java.util.regex.Pattern -import scala.util.matching.Regex -import scala.util.parsing.input._ -import scala.collection.immutable.PagedSeq -import scala.language.implicitConversions - -/** The ''most important'' differences between `RegexParsers` and - * [[scala.util.parsing.combinator.Parsers]] are: - * - * - `Elem` is defined to be [[scala.Char]] - * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, - * so that string literals can be used as parser combinators. - * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, - * so that regex expressions can be used as parser combinators. - * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, - * skip any whitespace before each parser is called. - * - Protected val `whiteSpace` returns a regex that identifies whitespace. - * - * For example, this creates a very simple calculator receiving `String` input: - * - * {{{ - * object Calculator extends RegexParsers { - * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } - * def factor: Parser[Double] = number | "(" ~> expr <~ ")" - * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { - * case number ~ list => (number /: list) { - * case (x, "*" ~ y) => x * y - * case (x, "/" ~ y) => x / y - * } - * } - * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { - * case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /: - * case (x, "+" ~ y) => x + y - * case (x, "-" ~ y) => x - y - * } - * } - * - * def apply(input: String): Double = parseAll(expr, input) match { - * case Success(result, _) => result - * case failure : NoSuccess => scala.sys.error(failure.msg) - * } - * } - * }}} - */ -trait RegexParsers extends Parsers { - - type Elem = Char - - protected val whiteSpace = """\s+""".r - - def skipWhitespace = whiteSpace.toString.length > 0 - - /** Method called to handle whitespace before parsers. - * - * It checks `skipWhitespace` and, if true, skips anything - * matching `whiteSpace` starting from the current offset. - * - * @param source The input being parsed. - * @param offset The offset into `source` from which to match. - * @return The offset to be used for the next parser. - */ - protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = - if (skipWhitespace) - (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { - case Some(matched) => offset + matched.end - case None => offset - } - else - offset - - /** A parser that matches a literal string */ - implicit def literal(s: String): Parser[String] = new Parser[String] { - def apply(in: Input) = { - val source = in.source - val offset = in.offset - val start = handleWhiteSpace(source, offset) - var i = 0 - var j = start - while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) { - i += 1 - j += 1 - } - if (i == s.length) - Success(source.subSequence(start, j).toString, in.drop(j - offset)) - else { - val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" - Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset)) - } - } - } - - /** A parser that matches a regex string */ - implicit def regex(r: Regex): Parser[String] = new Parser[String] { - def apply(in: Input) = { - val source = in.source - val offset = in.offset - val start = handleWhiteSpace(source, offset) - (r findPrefixMatchOf (source.subSequence(start, source.length))) match { - case Some(matched) => - Success(source.subSequence(start, start + matched.end).toString, - in.drop(start + matched.end - offset)) - case None => - val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" - Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset)) - } - } - } - - /** `positioned` decorates a parser's result with the start position of the input it consumed. - * If whitespace is being skipped, then it is skipped before the start position is recorded. - * - * @param p a `Parser` whose result conforms to `Positional`. - * @return A parser that has the same behaviour as `p`, but which marks its result with the - * start position of the input it consumed after whitespace has been skipped, if it - * didn't already have a position. - */ - override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = { - val pp = super.positioned(p) - new Parser[T] { - def apply(in: Input) = { - val offset = in.offset - val start = handleWhiteSpace(in.source, offset) - pp(in.drop (start - offset)) - } - } - } - - override def phrase[T](p: Parser[T]): Parser[T] = - super.phrase(p <~ opt("""\z""".r)) - - /** Parse some prefix of reader `in` with parser `p`. */ - def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = - p(in) - - /** Parse some prefix of character sequence `in` with parser `p`. */ - def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = - p(new CharSequenceReader(in)) - - /** Parse some prefix of reader `in` with parser `p`. */ - def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = - p(new PagedSeqReader(PagedSeq.fromReader(in))) - - /** Parse all of reader `in` with parser `p`. */ - def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = - parse(phrase(p), in) - - /** Parse all of reader `in` with parser `p`. */ - def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = - parse(phrase(p), in) - - /** Parse all of character sequence `in` with parser `p`. */ - def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = - parse(phrase(p), in) -} diff --git a/src/library/scala/util/parsing/combinator/lexical/Lexical.scala b/src/library/scala/util/parsing/combinator/lexical/Lexical.scala deleted file mode 100644 index d8029d068f..0000000000 --- a/src/library/scala/util/parsing/combinator/lexical/Lexical.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing -package combinator -package lexical - -import token._ -import input.CharArrayReader.EofCh - -/** This component complements the `Scanners` component with - * common operations for lexical parsers. - * - * Refer to [[scala.util.parsing.combinator.lexical.StdLexical]] - * for a concrete implementation for a simple, Scala-like language. - * - * @author Martin Odersky, Adriaan Moors - */ -abstract class Lexical extends Scanners with Tokens { - - /** A character-parser that matches a letter (and returns it).*/ - def letter = elem("letter", _.isLetter) - - /** A character-parser that matches a digit (and returns it).*/ - def digit = elem("digit", _.isDigit) - - /** A character-parser that matches any character except the ones given in `cs` (and returns it).*/ - def chrExcept(cs: Char*) = elem("", ch => (cs forall (ch != _))) - - /** A character-parser that matches a white-space character (and returns it).*/ - def whitespaceChar = elem("space char", ch => ch <= ' ' && ch != EofCh) -} diff --git a/src/library/scala/util/parsing/combinator/lexical/Scanners.scala b/src/library/scala/util/parsing/combinator/lexical/Scanners.scala deleted file mode 100644 index 2e12915bb8..0000000000 --- a/src/library/scala/util/parsing/combinator/lexical/Scanners.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package lexical - -import input._ - -/** This component provides core functionality for lexical parsers. - * - * See its subclasses [[scala.util.parsing.combinator.lexical.Lexical]] and -- most interestingly - * [[scala.util.parsing.combinator.lexical.StdLexical]], for more functionality. - * - * @author Martin Odersky, Adriaan Moors - */ -trait Scanners extends Parsers { - type Elem = Char - type Token - - /** This token is produced by a scanner `Scanner` when scanning failed. */ - def errorToken(msg: String): Token - - /** A parser that produces a token (from a stream of characters). */ - def token: Parser[Token] - - /** A parser for white-space -- its result will be discarded. */ - def whitespace: Parser[Any] - - /** `Scanner` is essentially¹ a parser that produces `Token`s - * from a stream of characters. The tokens it produces are typically - * passed to parsers in `TokenParsers`. - * - * @note ¹ `Scanner` is really a `Reader` of `Token`s - */ - class Scanner(in: Reader[Char]) extends Reader[Token] { - /** Convenience constructor (makes a character reader out of the given string) */ - def this(in: String) = this(new CharArrayReader(in.toCharArray())) - private val (tok, rest1, rest2) = whitespace(in) match { - case Success(_, in1) => - token(in1) match { - case Success(tok, in2) => (tok, in1, in2) - case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) - } - case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) - } - private def skip(in: Reader[Char]) = if (in.atEnd) in else in.rest - - override def source: java.lang.CharSequence = in.source - override def offset: Int = in.offset - def first = tok - def rest = new Scanner(rest2) - def pos = rest1.pos - def atEnd = in.atEnd || (whitespace(in) match { case Success(_, in1) => in1.atEnd case _ => false }) - } -} - diff --git a/src/library/scala/util/parsing/combinator/lexical/StdLexical.scala b/src/library/scala/util/parsing/combinator/lexical/StdLexical.scala deleted file mode 100644 index 32d7502cda..0000000000 --- a/src/library/scala/util/parsing/combinator/lexical/StdLexical.scala +++ /dev/null @@ -1,87 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package lexical - -import token._ -import input.CharArrayReader.EofCh -import scala.collection.mutable - -/** This component provides a standard lexical parser for a simple, - * [[http://scala-lang.org Scala]]-like language. It parses keywords and - * identifiers, numeric literals (integers), strings, and delimiters. - * - * To distinguish between identifiers and keywords, it uses a set of - * reserved identifiers: every string contained in `reserved` is returned - * as a keyword token. (Note that `=>` is hard-coded as a keyword.) - * Additionally, the kinds of delimiters can be specified by the - * `delimiters` set. - * - * Usually this component is used to break character-based input into - * bigger tokens, which are then passed to a token-parser (see - * [[scala.util.parsing.combinator.syntactical.TokenParsers]].) - * - * @author Martin Odersky - * @author Iulian Dragos - * @author Adriaan Moors - */ -class StdLexical extends Lexical with StdTokens { - // see `token` in `Scanners` - def token: Parser[Token] = - ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } - | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } - | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") } - | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") } - | EofCh ^^^ EOF - | '\'' ~> failure("unclosed string literal") - | '\"' ~> failure("unclosed string literal") - | delim - | failure("illegal character") - ) - - /** Returns the legal identifier chars, except digits. */ - def identChar = letter | elem('_') - - // see `whitespace in `Scanners` - def whitespace: Parser[Any] = rep[Any]( - whitespaceChar - | '/' ~ '*' ~ comment - | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) - | '/' ~ '*' ~ failure("unclosed comment") - ) - - protected def comment: Parser[Any] = ( - '*' ~ '/' ^^ { case _ => ' ' } - | chrExcept(EofCh) ~ comment - ) - - /** The set of reserved identifiers: these will be returned as `Keyword`s. */ - val reserved = new mutable.HashSet[String] - - /** The set of delimiters (ordering does not matter). */ - val delimiters = new mutable.HashSet[String] - - protected def processIdent(name: String) = - if (reserved contains name) Keyword(name) else Identifier(name) - - private lazy val _delim: Parser[Token] = { - // construct parser for delimiters by |'ing together the parsers for the individual delimiters, - // starting with the longest one -- otherwise a delimiter D will never be matched if there is - // another delimiter that is a prefix of D - def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) } - - val d = new Array[String](delimiters.size) - delimiters.copyToArray(d, 0) - scala.util.Sorting.quickSort(d) - (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x) - } - protected def delim: Parser[Token] = _delim -} diff --git a/src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala b/src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala deleted file mode 100644 index 5b9d14c9a7..0000000000 --- a/src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing -package combinator -package syntactical - -import token._ -import lexical.StdLexical -import scala.language.implicitConversions - -/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. -* -* @author Martin Odersky, Adriaan Moors - */ -class StandardTokenParsers extends StdTokenParsers { - type Tokens = StdTokens - val lexical = new StdLexical - - //an implicit keyword function that gives a warning when a given word is not in the reserved/delimiters list - override implicit def keyword(chars : String): Parser[String] = - if(lexical.reserved.contains(chars) || lexical.delimiters.contains(chars)) super.keyword(chars) - else failure("You are trying to parse \""+chars+"\", but it is neither contained in the delimiters list, nor in the reserved keyword list of your lexical object") - -} diff --git a/src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala b/src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala deleted file mode 100644 index adcf85da7a..0000000000 --- a/src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing -package combinator -package syntactical - -import token._ -import scala.collection.mutable -import scala.language.implicitConversions - -/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. -* -* @author Martin Odersky, Adriaan Moors - */ -trait StdTokenParsers extends TokenParsers { - type Tokens <: StdTokens - import lexical.{Keyword, NumericLit, StringLit, Identifier} - - protected val keywordCache = mutable.HashMap[String, Parser[String]]() - - /** A parser which matches a single keyword token. - * - * @param chars The character string making up the matched keyword. - * @return a `Parser` that matches the given string - */ -// implicit def keyword(chars: String): Parser[String] = accept(Keyword(chars)) ^^ (_.chars) - implicit def keyword(chars: String): Parser[String] = - keywordCache.getOrElseUpdate(chars, accept(Keyword(chars)) ^^ (_.chars)) - - /** A parser which matches a numeric literal */ - def numericLit: Parser[String] = - elem("number", _.isInstanceOf[NumericLit]) ^^ (_.chars) - - /** A parser which matches a string literal */ - def stringLit: Parser[String] = - elem("string literal", _.isInstanceOf[StringLit]) ^^ (_.chars) - - /** A parser which matches an identifier */ - def ident: Parser[String] = - elem("identifier", _.isInstanceOf[Identifier]) ^^ (_.chars) -} - - diff --git a/src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala b/src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala deleted file mode 100644 index b06babcd7e..0000000000 --- a/src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing -package combinator -package syntactical - -/** This is the core component for token-based parsers. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait TokenParsers extends Parsers { - /** `Tokens` is the abstract type of the `Token`s consumed by the parsers in this component. */ - type Tokens <: token.Tokens - - /** `lexical` is the component responsible for consuming some basic kind of - * input (usually character-based) and turning it into the tokens - * understood by these parsers. - */ - val lexical: Tokens - - /** The input-type for these parsers*/ - type Elem = lexical.Token - -} - - diff --git a/src/library/scala/util/parsing/combinator/token/StdTokens.scala b/src/library/scala/util/parsing/combinator/token/StdTokens.scala deleted file mode 100644 index a102d1541e..0000000000 --- a/src/library/scala/util/parsing/combinator/token/StdTokens.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package token - -/** This component provides the standard `Token`s for a simple, Scala-like language. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait StdTokens extends Tokens { - /** The class of keyword tokens */ - case class Keyword(chars: String) extends Token { - override def toString = "`"+chars+"'" - } - - /** The class of numeric literal tokens */ - case class NumericLit(chars: String) extends Token { - override def toString = chars - } - - /** The class of string literal tokens */ - case class StringLit(chars: String) extends Token { - override def toString = "\""+chars+"\"" - } - - /** The class of identifier tokens */ - case class Identifier(chars: String) extends Token { - override def toString = "identifier "+chars - } -} diff --git a/src/library/scala/util/parsing/combinator/token/Tokens.scala b/src/library/scala/util/parsing/combinator/token/Tokens.scala deleted file mode 100644 index 5c3f1f95b5..0000000000 --- a/src/library/scala/util/parsing/combinator/token/Tokens.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package token - -/** This component provides the notion of `Token`, the unit of information that is passed from lexical - * parsers in the `Lexical` component to the parsers in the `TokenParsers` component. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait Tokens { - /** Objects of this type are produced by a lexical parser or ``scanner'', and consumed by a parser. - * - * @see [[scala.util.parsing.combinator.syntactical.TokenParsers]] - */ - abstract class Token { - def chars: String - } - - /** A class of error tokens. Error tokens are used to communicate - * errors detected during lexical analysis - */ - case class ErrorToken(msg: String) extends Token { - def chars = "*** error: "+msg - } - - /** A class for end-of-file tokens */ - case object EOF extends Token { - def chars = "" - } - - /** This token is produced by a scanner `Scanner` when scanning failed. */ - def errorToken(msg: String): Token = new ErrorToken(msg) -} diff --git a/src/library/scala/util/parsing/input/CharArrayReader.scala b/src/library/scala/util/parsing/input/CharArrayReader.scala deleted file mode 100644 index 22530cb9aa..0000000000 --- a/src/library/scala/util/parsing/input/CharArrayReader.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.input - -/** An object encapsulating basic character constants. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -object CharArrayReader { - final val EofCh = '\032' -} - -/** A character array reader reads a stream of characters (keeping track of their positions) - * from an array. - * - * @param chars an array of characters - * @param index starting offset into the array; the first element returned will be `source(index)` - * - * @author Martin Odersky - * @author Adriaan Moors - */ -class CharArrayReader(chars: Array[Char], index: Int) extends CharSequenceReader(chars, index) { - - def this(chars: Array[Char]) = this(chars, 0) - -} diff --git a/src/library/scala/util/parsing/input/CharSequenceReader.scala b/src/library/scala/util/parsing/input/CharSequenceReader.scala deleted file mode 100644 index 8e7751cc82..0000000000 --- a/src/library/scala/util/parsing/input/CharSequenceReader.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.input - -/** An object encapsulating basic character constants. - * - * @author Martin Odersky, Adriaan Moors - */ -object CharSequenceReader { - final val EofCh = '\032' -} - -/** A character array reader reads a stream of characters (keeping track of their positions) - * from an array. - * - * @param source the source sequence - * @param offset starting offset. - * - * @author Martin Odersky - */ -class CharSequenceReader(override val source: java.lang.CharSequence, - override val offset: Int) extends Reader[Char] { - import CharSequenceReader._ - - /** Construct a `CharSequenceReader` with its first element at - * `source(0)` and position `(1,1)`. - */ - def this(source: java.lang.CharSequence) = this(source, 0) - - /** Returns the first element of the reader, or EofCh if reader is at its end. - */ - def first = - if (offset < source.length) source.charAt(offset) else EofCh - - /** Returns a CharSequenceReader consisting of all elements except the first. - * - * @return If `atEnd` is `true`, the result will be `this`; - * otherwise, it's a `CharSequenceReader` containing the rest of input. - */ - def rest: CharSequenceReader = - if (offset < source.length) new CharSequenceReader(source, offset + 1) - else this - - /** The position of the first element in the reader. - */ - def pos: Position = new OffsetPosition(source, offset) - - /** true iff there are no more elements in this reader (except for trailing - * EofCh's) - */ - def atEnd = offset >= source.length - - /** Returns an abstract reader consisting of all elements except the first - * `n` elements. - */ - override def drop(n: Int): CharSequenceReader = - new CharSequenceReader(source, offset + n) -} diff --git a/src/library/scala/util/parsing/input/NoPosition.scala b/src/library/scala/util/parsing/input/NoPosition.scala deleted file mode 100644 index 4a32264b79..0000000000 --- a/src/library/scala/util/parsing/input/NoPosition.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.input - -/** Undefined position. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -object NoPosition extends Position { - def line = 0 - def column = 0 - override def toString = "" - override def longString = toString - def lineContents = "" -} diff --git a/src/library/scala/util/parsing/input/OffsetPosition.scala b/src/library/scala/util/parsing/input/OffsetPosition.scala deleted file mode 100644 index 23f79c74d1..0000000000 --- a/src/library/scala/util/parsing/input/OffsetPosition.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -import scala.collection.mutable.ArrayBuffer - -/** `OffsetPosition` is a standard class for positions - * represented as offsets into a source ``document''. - * - * @param source The source document - * @param offset The offset indicating the position - * - * @author Martin Odersky - */ -case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends Position { - - /** An index that contains all line starts, including first line, and eof. */ - private lazy val index: Array[Int] = { - val lineStarts = new ArrayBuffer[Int] - lineStarts += 0 - for (i <- 0 until source.length) - if (source.charAt(i) == '\n') lineStarts += (i + 1) - lineStarts += source.length - lineStarts.toArray - } - - /** The line number referred to by the position; line numbers start at 1. */ - def line: Int = { - var lo = 0 - var hi = index.length - 1 - while (lo + 1 < hi) { - val mid = (hi + lo) / 2 - if (offset < index(mid)) hi = mid - else lo = mid - } - lo + 1 - } - - /** The column number referred to by the position; column numbers start at 1. */ - def column: Int = offset - index(line - 1) + 1 - - /** The contents of the line numbered at the current offset. - * - * @return the line at `offset` (not including a newline) - */ - def lineContents: String = - source.subSequence(index(line - 1), index(line)).toString - - /** Returns a string representation of the `Position`, of the form `line.column`. */ - override def toString = line+"."+column - - /** Compare this position to another, by first comparing their line numbers, - * and then -- if necessary -- using the columns to break a tie. - * - * @param that a `Position` to compare to this `Position` - * @return true if this position's line number or (in case of equal line numbers) - * column is smaller than the corresponding components of `that` - */ - override def <(that: Position) = that match { - case OffsetPosition(_, that_offset) => - this.offset < that_offset - case _ => - this.line < that.line || - this.line == that.line && this.column < that.column - } -} diff --git a/src/library/scala/util/parsing/input/PagedSeqReader.scala b/src/library/scala/util/parsing/input/PagedSeqReader.scala deleted file mode 100644 index 468f1f9a5f..0000000000 --- a/src/library/scala/util/parsing/input/PagedSeqReader.scala +++ /dev/null @@ -1,71 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.input - -import scala.collection.immutable.PagedSeq - -/** An object encapsulating basic character constants. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -object PagedSeqReader { - final val EofCh = '\032' -} - -/** A character array reader reads a stream of characters (keeping track of their positions) - * from an array. - * - * @param seq the source sequence - * @param offset starting offset. - * - * @author Martin Odersky - */ -class PagedSeqReader(seq: PagedSeq[Char], - override val offset: Int) extends Reader[Char] { - import PagedSeqReader._ - - override lazy val source: java.lang.CharSequence = seq - - /** Construct a `PagedSeqReader` with its first element at - * `source(0)` and position `(1,1)`. - */ - def this(seq: PagedSeq[Char]) = this(seq, 0) - - /** Returns the first element of the reader, or EofCh if reader is at its end - */ - def first = - if (seq.isDefinedAt(offset)) seq(offset) else EofCh - - /** Returns a PagedSeqReader consisting of all elements except the first - * - * @return If `atEnd` is `true`, the result will be `this`; - * otherwise, it's a `PagedSeqReader` containing the rest of input. - */ - def rest: PagedSeqReader = - if (seq.isDefinedAt(offset)) new PagedSeqReader(seq, offset + 1) - else this - - /** The position of the first element in the reader. - */ - def pos: Position = new OffsetPosition(source, offset) - - /** true iff there are no more elements in this reader (except for trailing - * EofCh's). - */ - def atEnd = !seq.isDefinedAt(offset) - - /** Returns an abstract reader consisting of all elements except the first - * `n` elements. - */ - override def drop(n: Int): PagedSeqReader = - new PagedSeqReader(seq, offset + n) -} diff --git a/src/library/scala/util/parsing/input/Position.scala b/src/library/scala/util/parsing/input/Position.scala deleted file mode 100644 index b7995a6471..0000000000 --- a/src/library/scala/util/parsing/input/Position.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -/** `Position` is the base trait for objects describing a position in a `document`. - * - * It provides functionality for: - * - generating a visual representation of this position (`longString`); - * - comparing two positions (`<`). - * - * To use this class for a concrete kind of `document`, implement the `lineContents` method. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait Position { - - /** The line number referred to by the position; line numbers start at 1. */ - def line: Int - - /** The column number referred to by the position; column numbers start at 1. */ - def column: Int - - /** The contents of the line at this position. (must not contain a new-line character). - */ - protected def lineContents: String - - /** Returns a string representation of the `Position`, of the form `line.column`. */ - override def toString = ""+line+"."+column - - /** Returns a more ``visual'' representation of this position. - * More precisely, the resulting string consists of two lines: - * 1. the line in the document referred to by this position - * 2. a caret indicating the column - * - * Example: - * {{{ - * List(this, is, a, line, from, the, document) - * ^ - * }}} - */ - def longString = lineContents+"\n"+lineContents.take(column-1).map{x => if (x == '\t') x else ' ' } + "^" - - /** Compare this position to another, by first comparing their line numbers, - * and then -- if necessary -- using the columns to break a tie. - * - * @param `that` a `Position` to compare to this `Position` - * @return true if this position's line number or (in case of equal line numbers) - * column is smaller than the corresponding components of `that` - */ - def <(that: Position) = { - this.line < that.line || - this.line == that.line && this.column < that.column - } -} diff --git a/src/library/scala/util/parsing/input/Positional.scala b/src/library/scala/util/parsing/input/Positional.scala deleted file mode 100644 index cfde67cadd..0000000000 --- a/src/library/scala/util/parsing/input/Positional.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -/** A trait for objects that have a source position. - * - * @author Martin Odersky, Adriaan Moors - */ -trait Positional { - - /** The source position of this object, initially set to undefined. */ - var pos: Position = NoPosition - - /** If current source position is undefined, update it with given position `newpos` - * @return the object itself - */ - def setPos(newpos: Position): this.type = { - if (pos eq NoPosition) pos = newpos - this - } -} - - diff --git a/src/library/scala/util/parsing/input/Reader.scala b/src/library/scala/util/parsing/input/Reader.scala deleted file mode 100644 index 9dbf08a7ca..0000000000 --- a/src/library/scala/util/parsing/input/Reader.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.input - - -/** An interface for streams of values that have positions. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -abstract class Reader[+T] { - - /** If this is a reader over character sequences, the underlying char sequence. - * If not, throws a `NoSuchMethodError` exception. - * - * @throws [[java.lang.NoSuchMethodError]] if this not a char sequence reader. - */ - def source: java.lang.CharSequence = - throw new NoSuchMethodError("not a char sequence reader") - - def offset: Int = - throw new NoSuchMethodError("not a char sequence reader") - - /** Returns the first element of the reader - */ - def first: T - - /** Returns an abstract reader consisting of all elements except the first - * - * @return If `atEnd` is `true`, the result will be `this'; - * otherwise, it's a `Reader` containing more elements. - */ - def rest: Reader[T] - - /** Returns an abstract reader consisting of all elements except the first `n` elements. - */ - def drop(n: Int): Reader[T] = { - var r: Reader[T] = this - var cnt = n - while (cnt > 0) { - r = r.rest; cnt -= 1 - } - r - } - - /** The position of the first element in the reader. - */ - def pos: Position - - /** `true` iff there are no more elements in this reader. - */ - def atEnd: Boolean -} diff --git a/src/library/scala/util/parsing/input/StreamReader.scala b/src/library/scala/util/parsing/input/StreamReader.scala deleted file mode 100644 index 30eb097fd7..0000000000 --- a/src/library/scala/util/parsing/input/StreamReader.scala +++ /dev/null @@ -1,76 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -import java.io.BufferedReader -import scala.collection.immutable.PagedSeq - -/** An object to create a `StreamReader` from a `java.io.Reader`. - * - * @author Miles Sabin - */ -object StreamReader { - final val EofCh = '\032' - - /** Create a `StreamReader` from a `java.io.Reader`. - * - * @param in the `java.io.Reader` that provides the underlying - * stream of characters for this Reader. - */ - def apply(in: java.io.Reader): StreamReader = { - new StreamReader(PagedSeq.fromReader(in), 0, 1) - } -} - -/** A StreamReader reads from a character sequence, typically created as a PagedSeq - * from a java.io.Reader - * - * NOTE: - * StreamReaders do not really fulfill the new contract for readers, which - * requires a `source` CharSequence representing the full input. - * Instead source is treated line by line. - * As a consequence, regex matching cannot extend beyond a single line - * when a StreamReader are used for input. - * - * If you need to match regexes spanning several lines you should consider - * class `PagedSeqReader` instead. - * - * @author Miles Sabin - * @author Martin Odersky - */ -sealed class StreamReader(seq: PagedSeq[Char], off: Int, lnum: Int) extends PagedSeqReader(seq, off) { - import StreamReader._ - - override def rest: StreamReader = - if (off == seq.length) this - else if (seq(off) == '\n') - new StreamReader(seq.slice(off + 1), 0, lnum + 1) - else new StreamReader(seq, off + 1, lnum) - - private def nextEol = { - var i = off - while (i < seq.length && seq(i) != '\n' && seq(i) != EofCh) i += 1 - i - } - - override def drop(n: Int): StreamReader = { - val eolPos = nextEol - if (eolPos < off + n && eolPos < seq.length) - new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1).drop(off + n - (eolPos + 1)) - else - new StreamReader(seq, off + n, lnum) - } - - override def pos: Position = new Position { - def line = lnum - def column = off + 1 - def lineContents = seq.slice(0, nextEol).toString - } -} diff --git a/src/library/scala/util/parsing/json/JSON.scala b/src/library/scala/util/parsing/json/JSON.scala deleted file mode 100644 index b06dddf532..0000000000 --- a/src/library/scala/util/parsing/json/JSON.scala +++ /dev/null @@ -1,97 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.json - -/** - * This object provides a simple interface to the JSON parser class. - * The default conversion for numerics is into a double. If you wish to - * override this behavior at the global level, you can set the - * `globalNumberParser` property to your own `(String => Any)` function. - * If you only want to override at the per-thread level then you can set - * the `perThreadNumberParser` property to your function. For example: - * {{{ - * val myConversionFunc = {input : String => BigDecimal(input)} - * - * // Global override - * JSON.globalNumberParser = myConversionFunc - * - * // Per-thread override - * JSON.perThreadNumberParser = myConversionFunc - * }}} - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This object will be removed.", "2.11.0") -object JSON extends Parser { - - /** - * This method converts ''raw'' results back into the original, deprecated - * form. - */ - private def unRaw (in : Any) : Any = in match { - case JSONObject(obj) => obj.map({ case (k,v) => (k,unRaw(v))}).toList - case JSONArray(list) => list.map(unRaw) - case x => x - } - - /** - * Parse the given `JSON` string and return a list of elements. If the - * string is a `JSON` object it will be a `JSONObject`. If it's a `JSON` - * array it will be a `JSONArray`. - * - * @param input the given `JSON` string. - * @return an optional `JSONType` element. - */ - def parseRaw(input : String) : Option[JSONType] = - phrase(root)(new lexical.Scanner(input)) match { - case Success(result, _) => Some(result) - case _ => None - } - - /** - * Parse the given `JSON` string and return either a `List[Any]` - * if the `JSON` string specifies an `Array`, or a - * `Map[String,Any]` if the `JSON` string specifies an object. - * - * @param input the given `JSON` string. - * @return an optional list or map. - */ - def parseFull(input: String): Option[Any] = - parseRaw(input) match { - case Some(data) => Some(resolveType(data)) - case None => None - } - - /** - * A utility method to resolve a parsed `JSON` list into objects or - * arrays. See the `parse` method for details. - */ - def resolveType(input: Any): Any = input match { - case JSONObject(data) => data.transform { - case (k,v) => resolveType(v) - } - case JSONArray(data) => data.map(resolveType) - case x => x - } - - /** - * The global (VM) default function for converting a string to a numeric value. - */ - def globalNumberParser_=(f: NumericParser) { defaultNumberParser = f } - def globalNumberParser : NumericParser = defaultNumberParser - - /** - * Defines the function used to convert a numeric string literal into a - * numeric format on a per-thread basis. Use `globalNumberParser` for a - * global override. - */ - def perThreadNumberParser_=(f : NumericParser) { numberParser.set(f) } - def perThreadNumberParser : NumericParser = numberParser.get() -} diff --git a/src/library/scala/util/parsing/json/Lexer.scala b/src/library/scala/util/parsing/json/Lexer.scala deleted file mode 100644 index 7fc4e0bab6..0000000000 --- a/src/library/scala/util/parsing/json/Lexer.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.json - -import scala.util.parsing.combinator._ -import scala.util.parsing.combinator.lexical._ -import scala.util.parsing.input.CharArrayReader.EofCh - -/** - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -class Lexer extends StdLexical with ImplicitConversions { - - override def token: Parser[Token] = - //( '\"' ~ rep(charSeq | letter) ~ '\"' ^^ lift(StringLit) - ( string ^^ StringLit - | number ~ letter ^^ { case n ~ l => ErrorToken("Invalid number format : " + n + l) } - | '-' ~> whitespace ~ number ~ letter ^^ { case ws ~ num ~ l => ErrorToken("Invalid number format : -" + num + l) } - | '-' ~> whitespace ~ number ^^ { case ws ~ num => NumericLit("-" + num) } - | number ^^ NumericLit - | EofCh ^^^ EOF - | delim - | '\"' ~> failure("Unterminated string") - | rep(letter) ^^ checkKeyword - | failure("Illegal character") - ) - - def checkKeyword(xs : List[Any]) = { - val strRep = xs mkString "" - if (reserved contains strRep) Keyword(strRep) else ErrorToken("Not a keyword: " + strRep) - } - - /** A string is a collection of zero or more Unicode characters, wrapped in - * double quotes, using backslash escapes (cf. http://www.json.org/). - */ - def string = '\"' ~> rep(charSeq | chrExcept('\"', '\n', EofCh)) <~ '\"' ^^ { _ mkString "" } - - override def whitespace = rep(whitespaceChar) - - def number = intPart ~ opt(fracPart) ~ opt(expPart) ^^ { case i ~ f ~ e => - i + optString(".", f) + optString("", e) - } - def intPart = zero | intList - def intList = nonzero ~ rep(digit) ^^ {case x ~ y => (x :: y) mkString ""} - def fracPart = '.' ~> rep(digit) ^^ { _ mkString "" } - def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d => - e + optString("", s) + d.mkString("") - } - - private def optString[A](pre: String, a: Option[A]) = a match { - case Some(x) => pre + x.toString - case None => "" - } - - def zero: Parser[String] = '0' ^^^ "0" - def nonzero = elem("nonzero digit", d => d.isDigit && d != '0') - def exponent = elem("exponent character", d => d == 'e' || d == 'E') - def sign = elem("sign character", d => d == '-' || d == '+') - - def charSeq: Parser[String] = - ('\\' ~ '\"' ^^^ "\"" - |'\\' ~ '\\' ^^^ "\\" - |'\\' ~ '/' ^^^ "/" - |'\\' ~ 'b' ^^^ "\b" - |'\\' ~ 'f' ^^^ "\f" - |'\\' ~ 'n' ^^^ "\n" - |'\\' ~ 'r' ^^^ "\r" - |'\\' ~ 't' ^^^ "\t" - |'\\' ~> 'u' ~> unicodeBlock) - - val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray - def hexDigit = elem("hex digit", hexDigits.contains(_)) - - private def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { - case a ~ b ~ c ~ d => - new String(Array(Integer.parseInt(List(a, b, c, d) mkString "", 16)), 0, 1) - } - - //private def lift[T](f: String => T)(xs: List[Any]): T = f(xs mkString "") -} diff --git a/src/library/scala/util/parsing/json/Parser.scala b/src/library/scala/util/parsing/json/Parser.scala deleted file mode 100644 index 521dfc6612..0000000000 --- a/src/library/scala/util/parsing/json/Parser.scala +++ /dev/null @@ -1,147 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.json - -import scala.util.parsing.combinator._ -import scala.util.parsing.combinator.syntactical._ - -/** - * A marker class for the JSON result types. - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -sealed abstract class JSONType { - /** - * This version of toString allows you to provide your own value - * formatter. - */ - def toString (formatter : JSONFormat.ValueFormatter) : String - - /** - * Returns a String representation of this JSON value - * using the JSONFormat.defaultFormatter. - */ - override def toString = toString(JSONFormat.defaultFormatter) -} - -/** - * This object defines functions that are used when converting JSONType - * values into String representations. Mostly this is concerned with - * proper quoting of strings. - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This object will be removed.", "2.11.0") -object JSONFormat { - /** - * This type defines a function that can be used to - * format values into JSON format. - */ - type ValueFormatter = Any => String - - /** - * The default formatter used by the library. You can - * provide your own with the toString calls on - * JSONObject and JSONArray instances. - */ - val defaultFormatter : ValueFormatter = (x : Any) => x match { - case s : String => "\"" + quoteString(s) + "\"" - case jo : JSONObject => jo.toString(defaultFormatter) - case ja : JSONArray => ja.toString(defaultFormatter) - case other => other.toString - } - - /** - * This function can be used to properly quote Strings - * for JSON output. - */ - def quoteString (s : String) : String = - s.map { - case '"' => "\\\"" - case '\\' => "\\\\" - case '/' => "\\/" - case '\b' => "\\b" - case '\f' => "\\f" - case '\n' => "\\n" - case '\r' => "\\r" - case '\t' => "\\t" - /* We'll unicode escape any control characters. These include: - * 0x0 -> 0x1f : ASCII Control (C0 Control Codes) - * 0x7f : ASCII DELETE - * 0x80 -> 0x9f : C1 Control Codes - * - * Per RFC4627, section 2.5, we're not technically required to - * encode the C1 codes, but we do to be safe. - */ - case c if ((c >= '\u0000' && c <= '\u001f') || (c >= '\u007f' && c <= '\u009f')) => "\\u%04x".format(c.toInt) - case c => c - }.mkString -} - -/** - * Represents a JSON Object (map). - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -case class JSONObject (obj : Map[String,Any]) extends JSONType { - def toString (formatter : JSONFormat.ValueFormatter) = - "{" + obj.map({ case (k,v) => formatter(k.toString) + " : " + formatter(v) }).mkString(", ") + "}" -} - -/** - * Represents a JSON Array (list). - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -case class JSONArray (list : List[Any]) extends JSONType { - def toString (formatter : JSONFormat.ValueFormatter) = - "[" + list.map(formatter).mkString(", ") + "]" -} - -/** - * The main JSON Parser. - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -class Parser extends StdTokenParsers with ImplicitConversions { - // Fill in abstract defs - type Tokens = Lexer - val lexical = new Tokens - - // Configure lexical parsing - lexical.reserved ++= List("true", "false", "null") - lexical.delimiters ++= List("{", "}", "[", "]", ":", ",") - - /** Type signature for functions that can parse numeric literals */ - type NumericParser = String => Any - - // Global default number parsing function - protected var defaultNumberParser : NumericParser = {_.toDouble} - - // Per-thread default number parsing function - protected val numberParser = new ThreadLocal[NumericParser]() { - override def initialValue() = defaultNumberParser - } - - // Define the grammar - def root = jsonObj | jsonArray - def jsonObj = "{" ~> repsep(objEntry, ",") <~ "}" ^^ { case vals : List[_] => JSONObject(Map(vals : _*)) } - def jsonArray = "[" ~> repsep(value, ",") <~ "]" ^^ { case vals : List[_] => JSONArray(vals) } - def objEntry = stringVal ~ (":" ~> value) ^^ { case x ~ y => (x, y) } - def value: Parser[Any] = (jsonObj | jsonArray | number | "true" ^^^ true | "false" ^^^ false | "null" ^^^ null | stringVal) - def stringVal = accept("string", { case lexical.StringLit(n) => n} ) - def number = accept("number", { case lexical.NumericLit(n) => numberParser.get.apply(n)} ) -} - diff --git a/src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala b/src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala new file mode 100644 index 0000000000..0683ea927d --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala @@ -0,0 +1,43 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import scala.language.implicitConversions + +/** This object contains implicit conversions that come in handy when using the `^^` combinator. + * + * Refer to [[scala.util.parsing.combinator.Parsers]] to construct an AST from the concrete syntax. + * + * The reason for this is that the sequential composition combinator (`~`) combines its constituents + * into a ~. When several `~`s are combined, this results in nested `~`s (to the left). + * The `flatten*` coercions makes it easy to apply an `n`-argument function to a nested `~` of + * depth `n-1` + * + * The `headOptionTailToFunList` converts a function that takes a `List[A]` to a function that + * accepts a `~[A, Option[List[A]]]` (this happens when parsing something of the following + * shape: `p ~ opt("." ~ repsep(p, "."))` -- where `p` is a parser that yields an `A`). + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +trait ImplicitConversions { self: Parsers => + implicit def flatten2[A, B, C] (f: (A, B) => C) = + (p: ~[A, B]) => p match {case a ~ b => f(a, b)} + implicit def flatten3[A, B, C, D] (f: (A, B, C) => D) = + (p: ~[~[A, B], C]) => p match {case a ~ b ~ c => f(a, b, c)} + implicit def flatten4[A, B, C, D, E] (f: (A, B, C, D) => E) = + (p: ~[~[~[A, B], C], D]) => p match {case a ~ b ~ c ~ d => f(a, b, c, d)} + implicit def flatten5[A, B, C, D, E, F](f: (A, B, C, D, E) => F) = + (p: ~[~[~[~[A, B], C], D], E]) => p match {case a ~ b ~ c ~ d ~ e=> f(a, b, c, d, e)} + implicit def headOptionTailToFunList[A, T] (f: List[A] => T)= + (p: ~[A, Option[List[A]]]) => f(p._1 :: (p._2 match { case Some(xs) => xs case None => Nil})) +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala new file mode 100644 index 0000000000..01288a182e --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import scala.annotation.migration + +/** `JavaTokenParsers` differs from [[scala.util.parsing.combinator.RegexParsers]] + * by adding the following definitions: + * + * - `ident` + * - `wholeNumber` + * - `decimalNumber` + * - `stringLiteral` + * - `floatingPointNumber` + */ +trait JavaTokenParsers extends RegexParsers { + /** Anything that is a valid Java identifier, according to + * The Java Language Spec. + * Generally, this means a letter, followed by zero or more letters or numbers. + */ + def ident: Parser[String] = + """\p{javaJavaIdentifierStart}\p{javaJavaIdentifierPart}*""".r + /** An integer, without sign or with a negative sign. */ + def wholeNumber: Parser[String] = + """-?\d+""".r + /** Number following one of these rules: + * + * - An integer. For example: `13` + * - An integer followed by a decimal point. For example: `3.` + * - An integer followed by a decimal point and fractional part. For example: `3.14` + * - A decimal point followed by a fractional part. For example: `.1` + */ + def decimalNumber: Parser[String] = + """(\d+(\.\d*)?|\d*\.\d+)""".r + /** Double quotes (`"`) enclosing a sequence of: + * + * - Any character except double quotes, control characters or backslash (`\`) + * - A backslash followed by another backslash, a single or double quote, or one + * of the letters `b`, `f`, `n`, `r` or `t` + * - `\` followed by `u` followed by four hexadecimal digits + */ + @migration("`stringLiteral` allows escaping single and double quotes, but not forward slashes any longer.", "2.10.0") + def stringLiteral: Parser[String] = + ("\""+"""([^"\p{Cntrl}\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*"""+"\"").r + /** A number following the rules of `decimalNumber`, with the following + * optional additions: + * + * - Preceded by a negative sign + * - Followed by `e` or `E` and an optionally signed integer + * - Followed by `f`, `f`, `d` or `D` (after the above rule, if both are used) + */ + def floatingPointNumber: Parser[String] = + """-?(\d+(\.\d*)?|\d*\.\d+)([eE][+-]?\d+)?[fFdD]?""".r +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala new file mode 100644 index 0000000000..a11dd18e62 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala @@ -0,0 +1,312 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.combinator + +import scala.util.parsing.input.{ Reader, Position } +import scala.collection.mutable +import scala.language.implicitConversions + +/** + * `PackratParsers` is a component that extends the parser combinators + * provided by [[scala.util.parsing.combinator.Parsers]] with a memoization + * facility (''Packrat Parsing''). + * + * Packrat Parsing is a technique for implementing backtracking, + * recursive-descent parsers, with the advantage that it guarantees + * unlimited lookahead and a linear parse time. Using this technique, + * left recursive grammars can also be accepted. + * + * Using `PackratParsers` is very similar to using `Parsers`: + * - any class/trait that extends `Parsers` (directly or through a subclass) + * can mix in `PackratParsers`. + * Example: `'''object''' MyGrammar '''extends''' StandardTokenParsers '''with''' PackratParsers` + * - each grammar production previously declared as a `def` without formal + * parameters becomes a `lazy val`, and its type is changed from + * `Parser[Elem]` to `PackratParser[Elem]`. + * So, for example, `'''def''' production: Parser[Int] = {...}` + * becomes `'''lazy val''' production: PackratParser[Int] = {...}` + * - Important: using `PackratParser`s is not an ''all or nothing'' decision. + * They can be free mixed with regular `Parser`s in a single grammar. + * + * Cached parse results are attached to the ''input'', not the grammar. + * Therefore, `PackratsParser`s require a `PackratReader` as input, which + * adds memoization to an underlying `Reader`. Programmers can create + * `PackratReader` objects either manually, as in + * `production('''new''' PackratReader('''new''' lexical.Scanner("input")))`, + * but the common way should be to rely on the combinator `phrase` to wrap + * a given input with a `PackratReader` if the input is not one itself. + * + * @see Bryan Ford: "Packrat Parsing: Simple, Powerful, Lazy, Linear Time." ICFP'02 + * @see Alessandro Warth, James R. Douglass, Todd Millstein: "Packrat Parsers Can Support Left Recursion." PEPM'08 + * + * @since 2.8 + * @author Manohar Jonnalagedda + * @author Tiark Rompf + */ + +trait PackratParsers extends Parsers { + + //type Input = PackratReader[Elem] + + /** + * A specialized `Reader` class that wraps an underlying `Reader` + * and provides memoization of parse results. + */ + class PackratReader[+T](underlying: Reader[T]) extends Reader[T] { outer => + + /* + * caching of intermediate parse results and information about recursion + */ + private[PackratParsers] val cache = mutable.HashMap.empty[(Parser[_], Position), MemoEntry[_]] + + private[PackratParsers] def getFromCache[T](p: Parser[T]): Option[MemoEntry[T]] = { + cache.get((p, pos)).asInstanceOf[Option[MemoEntry[T]]] + } + + private[PackratParsers] def updateCacheAndGet[T](p: Parser[T], w: MemoEntry[T]): MemoEntry[T] = { + cache.put((p, pos),w) + w + } + + /* a cache for storing parser heads: allows to know which parser is involved + in a recursion*/ + private[PackratParsers] val recursionHeads: mutable.HashMap[Position, Head] = mutable.HashMap.empty + + //a stack that keeps a list of all involved rules + private[PackratParsers] var lrStack: List[LR] = Nil + + override def source: java.lang.CharSequence = underlying.source + override def offset: Int = underlying.offset + + def first: T = underlying.first + def rest: Reader[T] = new PackratReader(underlying.rest) { + override private[PackratParsers] val cache = outer.cache + override private[PackratParsers] val recursionHeads = outer.recursionHeads + lrStack = outer.lrStack + } + + def pos: Position = underlying.pos + def atEnd: Boolean = underlying.atEnd + } + + /** + * A parser generator delimiting whole phrases (i.e. programs). + * + * Overridden to make sure any input passed to the argument parser + * is wrapped in a `PackratReader`. + */ + override def phrase[T](p: Parser[T]) = { + val q = super.phrase(p) + new PackratParser[T] { + def apply(in: Input) = in match { + case in: PackratReader[_] => q(in) + case in => q(new PackratReader(in)) + } + } + } + + private def getPosFromResult(r: ParseResult[_]): Position = r.next.pos + + // auxiliary data structures + + private case class MemoEntry[+T](var r: Either[LR,ParseResult[_]]){ + def getResult: ParseResult[T] = r match { + case Left(LR(res,_,_)) => res.asInstanceOf[ParseResult[T]] + case Right(res) => res.asInstanceOf[ParseResult[T]] + } + } + + private case class LR(var seed: ParseResult[_], var rule: Parser[_], var head: Option[Head]){ + def getPos: Position = getPosFromResult(seed) + } + + private case class Head(var headParser: Parser[_], var involvedSet: List[Parser[_]], var evalSet: List[Parser[_]]){ + def getHead = headParser + } + + /** + * The root class of packrat parsers. + */ + abstract class PackratParser[+T] extends super.Parser[T] + + /** + * Implicitly convert a parser to a packrat parser. + * The conversion is triggered by giving the appropriate target type: + * {{{ + * val myParser: PackratParser[MyResult] = aParser + * }}} */ + implicit def parser2packrat[T](p: => super.Parser[T]): PackratParser[T] = { + lazy val q = p + memo(super.Parser {in => q(in)}) + } + + /* + * An unspecified function that is called when a packrat reader is applied. + * It verifies whether we are in the process of growing a parse or not. + * In the former case, it makes sure that rules involved in the recursion are evaluated. + * It also prevents non-involved rules from getting evaluated further + */ + private def recall(p: super.Parser[_], in: PackratReader[Elem]): Option[MemoEntry[_]] = { + val cached = in.getFromCache(p) + val head = in.recursionHeads.get(in.pos) + + head match { + case None => /*no heads*/ cached + case Some(h@Head(hp, involved, evalSet)) => { + //heads found + if(cached == None && !(hp::involved contains p)) { + //Nothing in the cache, and p is not involved + return Some(MemoEntry(Right(Failure("dummy ",in)))) + } + if(evalSet contains p){ + //something in cache, and p is in the evalSet + //remove the rule from the evalSet of the Head + h.evalSet = h.evalSet.filterNot(_==p) + val tempRes = p(in) + //we know that cached has an entry here + val tempEntry: MemoEntry[_] = cached.get // match {case Some(x: MemoEntry[_]) => x} + //cache is modified + tempEntry.r = Right(tempRes) + } + cached + } + } + } + + /* + * setting up the left-recursion. We have the LR for the rule head + * we modify the involvedSets of all LRs in the stack, till we see + * the current parser again + */ + private def setupLR(p: Parser[_], in: PackratReader[_], recDetect: LR): Unit = { + if(recDetect.head == None) recDetect.head = Some(Head(p, Nil, Nil)) + + in.lrStack.takeWhile(_.rule != p).foreach {x => + x.head = recDetect.head + recDetect.head.map(h => h.involvedSet = x.rule::h.involvedSet) + } + } + + /* + * growing, if needed the recursion + * check whether the parser we are growing is the head of the rule. + * Not => no grow + */ + + /* + * Once the result of the recall function is known, if it is nil, then we need to store a dummy +failure into the cache (much like in the previous listings) and compute the future parse. If it +is not, however, this means we have detected a recursion, and we use the setupLR function +to update each parser involved in the recursion. + */ + + private def lrAnswer[T](p: Parser[T], in: PackratReader[Elem], growable: LR): ParseResult[T] = growable match { + //growable will always be having a head, we can't enter lrAnswer otherwise + case LR(seed ,rule, Some(head)) => + if(head.getHead != p) /*not head rule, so not growing*/ seed.asInstanceOf[ParseResult[T]] + else { + in.updateCacheAndGet(p, MemoEntry(Right[LR, ParseResult[T]](seed.asInstanceOf[ParseResult[T]]))) + seed match { + case f@Failure(_,_) => f + case e@Error(_,_) => e + case s@Success(_,_) => /*growing*/ grow(p, in, head) + } + } + case _=> throw new Exception("lrAnswer with no head !!") + } + + //p here should be strict (cannot be non-strict) !! + //failing left-recursive grammars: This is done by simply storing a failure if nothing is found + + /** + * Explicitly convert a given parser to a memoizing packrat parser. + * In most cases, client code should avoid calling `memo` directly + * and rely on implicit conversion instead. + */ + def memo[T](p: super.Parser[T]): PackratParser[T] = { + new PackratParser[T] { + def apply(in: Input) = { + /* + * transformed reader + */ + val inMem = in.asInstanceOf[PackratReader[Elem]] + + //look in the global cache if in a recursion + val m = recall(p, inMem) + m match { + //nothing has been done due to recall + case None => + val base = LR(Failure("Base Failure",in), p, None) + inMem.lrStack = base::inMem.lrStack + //cache base result + inMem.updateCacheAndGet(p,MemoEntry(Left(base))) + //parse the input + val tempRes = p(in) + //the base variable has passed equality tests with the cache + inMem.lrStack = inMem.lrStack.tail + //check whether base has changed, if yes, we will have a head + base.head match { + case None => + /*simple result*/ + inMem.updateCacheAndGet(p,MemoEntry(Right(tempRes))) + tempRes + case s@Some(_) => + /*non simple result*/ + base.seed = tempRes + //the base variable has passed equality tests with the cache + val res = lrAnswer(p, inMem, base) + res + } + + case Some(mEntry) => { + //entry found in cache + mEntry match { + case MemoEntry(Left(recDetect)) => { + setupLR(p, inMem, recDetect) + //all setupLR does is change the heads of the recursions, so the seed will stay the same + recDetect match {case LR(seed, _, _) => seed.asInstanceOf[ParseResult[T]]} + } + case MemoEntry(Right(res: ParseResult[_])) => res.asInstanceOf[ParseResult[T]] + } + } + } + } + } + } + + private def grow[T](p: super.Parser[T], rest: PackratReader[Elem], head: Head): ParseResult[T] = { + //store the head into the recursionHeads + rest.recursionHeads.put(rest.pos, head /*match {case Head(hp,involved,_) => Head(hp,involved,involved)}*/) + val oldRes: ParseResult[T] = rest.getFromCache(p).get match { + case MemoEntry(Right(x)) => x.asInstanceOf[ParseResult[T]] + case _ => throw new Exception("impossible match") + } + + //resetting the evalSet of the head of the recursion at each beginning of growth + head.evalSet = head.involvedSet + val tempRes = p(rest); tempRes match { + case s@Success(_,_) => + if(getPosFromResult(oldRes) < getPosFromResult(tempRes)) { + rest.updateCacheAndGet(p, MemoEntry(Right(s))) + grow(p, rest, head) + } else { + //we're done with growing, we can remove data from recursion head + rest.recursionHeads -= rest.pos + rest.getFromCache(p).get match { + case MemoEntry(Right(x: ParseResult[_])) => x.asInstanceOf[ParseResult[T]] + case _ => throw new Exception("impossible match") + } + } + case f => + rest.recursionHeads -= rest.pos + /*rest.updateCacheAndGet(p, MemoEntry(Right(f)));*/oldRes + } + } +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/Parsers.scala b/src/parser-combinators/scala/util/parsing/combinator/Parsers.scala new file mode 100644 index 0000000000..16754646fd --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/Parsers.scala @@ -0,0 +1,919 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.combinator + +import scala.util.parsing.input._ +import scala.collection.mutable.ListBuffer +import scala.annotation.tailrec +import scala.annotation.migration +import scala.language.implicitConversions +import scala.util.DynamicVariable + +// TODO: better error handling (labelling like parsec's ) + +/** `Parsers` is a component that ''provides'' generic parser combinators. + * + * There are two abstract members that must be defined in order to + * produce parsers: the type `Elem` and + * [[scala.util.parsing.combinator.Parsers.Parser]]. There are helper + * methods that produce concrete `Parser` implementations -- see ''primitive + * parser'' below. + * + * A `Parsers` may define multiple `Parser` instances, which are combined + * to produced the desired parser. + * + * The type of the elements these parsers should parse must be defined + * by declaring `Elem` + * (each parser is polymorphic in the type of result it produces). + * + * There are two aspects to the result of a parser: + * 1. success or failure + * 1. the result. + * + * A [[scala.util.parsing.combinator.Parsers.Parser]] produces both kinds of information, + * by returning a [[scala.util.parsing.combinator.Parsers.ParseResult]] when its `apply` + * method is called on an input. + * + * The term ''parser combinator'' refers to the fact that these parsers + * are constructed from primitive parsers and composition operators, such + * as sequencing, alternation, optionality, repetition, lifting, and so on. For example, + * given `p1` and `p2` of type [[scala.util.parsing.combinator.Parsers.Parser]]: + * + * {{{ + * p1 ~ p2 // sequencing: must match p1 followed by p2 + * p1 | p2 // alternation: must match either p1 or p2, with preference given to p1 + * p1.? // optionality: may match p1 or not + * p1.* // repetition: matches any number of repetitions of p1 + * }}} + * + * These combinators are provided as methods on [[scala.util.parsing.combinator.Parsers.Parser]], + * or as methods taking one or more `Parsers` and returning a `Parser` provided in + * this class. + * + * A ''primitive parser'' is a parser that accepts or rejects a single + * piece of input, based on a certain criterion, such as whether the + * input... + * - is equal to some given object (see method `accept`), + * - satisfies a certain predicate (see method `acceptIf`), + * - is in the domain of a given partial function (see method `acceptMatch`) + * - or other conditions, by using one of the other methods available, or subclassing `Parser` + * + * Even more primitive parsers always produce the same result, irrespective of the input. See + * methods `success`, `err` and `failure` as examples. + * + * @see [[scala.util.parsing.combinator.RegexParsers]] and other known subclasses for practical examples. + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +trait Parsers { + /** the type of input elements the provided parsers consume (When consuming + * invidual characters, a parser is typically called a ''scanner'', which + * produces ''tokens'' that are consumed by what is normally called a ''parser''. + * Nonetheless, the same principles apply, regardless of the input type.) */ + type Elem + + /** The parser input is an abstract reader of input elements, i.e. the type + * of input the parsers in this component expect. */ + type Input = Reader[Elem] + + /** A base class for parser results. A result is either successful or not + * (failure may be fatal, i.e., an Error, or not, i.e., a Failure). On + * success, provides a result of type `T` which consists of some result + * (and the rest of the input). */ + sealed abstract class ParseResult[+T] { + /** Functional composition of ParseResults. + * + * @param f the function to be lifted over this result + * @return `f` applied to the result of this `ParseResult`, packaged up as a new `ParseResult` + */ + def map[U](f: T => U): ParseResult[U] + + /** Partial functional composition of ParseResults. + * + * @param f the partial function to be lifted over this result + * @param error a function that takes the same argument as `f` and + * produces an error message to explain why `f` wasn't applicable + * (it is called when this is the case) + * @return if `f` f is defined at the result in this `ParseResult`, `f` + * applied to the result of this `ParseResult`, packaged up as + * a new `ParseResult`. If `f` is not defined, `Failure`. + */ + def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] + + def flatMapWithNext[U](f: T => Input => ParseResult[U]): ParseResult[U] + + def filterWithError(p: T => Boolean, error: T => String, position: Input): ParseResult[T] + + def append[U >: T](a: => ParseResult[U]): ParseResult[U] + + def isEmpty = !successful + + /** Returns the embedded result. */ + def get: T + + def getOrElse[B >: T](default: => B): B = + if (isEmpty) default else this.get + + val next: Input + + val successful: Boolean + } + + /** The success case of `ParseResult`: contains the result and the remaining input. + * + * @param result The parser's output + * @param next The parser's remaining input + */ + case class Success[+T](result: T, override val next: Input) extends ParseResult[T] { + def map[U](f: T => U) = Success(f(result), next) + def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] + = if(f.isDefinedAt(result)) Success(f(result), next) + else Failure(error(result), next) + + def flatMapWithNext[U](f: T => Input => ParseResult[U]): ParseResult[U] + = f(result)(next) + + def filterWithError(p: T => Boolean, error: T => String, position: Input): ParseResult[T] = + if (p(result)) this + else Failure(error(result), position) + + def append[U >: T](a: => ParseResult[U]): ParseResult[U] = this + + def get: T = result + + /** The toString method of a Success. */ + override def toString = "["+next.pos+"] parsed: "+result + + val successful = true + } + + private lazy val lastNoSuccessVar = new DynamicVariable[Option[NoSuccess]](None) + + /** A common super-class for unsuccessful parse results. */ + sealed abstract class NoSuccess(val msg: String, override val next: Input) extends ParseResult[Nothing] { // when we don't care about the difference between Failure and Error + val successful = false + + if (lastNoSuccessVar.value forall (v => !(next.pos < v.next.pos))) + lastNoSuccessVar.value = Some(this) + + def map[U](f: Nothing => U) = this + def mapPartial[U](f: PartialFunction[Nothing, U], error: Nothing => String): ParseResult[U] = this + + def flatMapWithNext[U](f: Nothing => Input => ParseResult[U]): ParseResult[U] + = this + + def filterWithError(p: Nothing => Boolean, error: Nothing => String, position: Input): ParseResult[Nothing] = this + + def get: Nothing = scala.sys.error("No result when parsing failed") + } + /** An extractor so `NoSuccess(msg, next)` can be used in matches. */ + object NoSuccess { + def unapply[T](x: ParseResult[T]) = x match { + case Failure(msg, next) => Some((msg, next)) + case Error(msg, next) => Some((msg, next)) + case _ => None + } + } + + /** The failure case of `ParseResult`: contains an error-message and the remaining input. + * Parsing will back-track when a failure occurs. + * + * @param msg An error message string describing the failure. + * @param next The parser's unconsumed input at the point where the failure occurred. + */ + case class Failure(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { + /** The toString method of a Failure yields an error message. */ + override def toString = "["+next.pos+"] failure: "+msg+"\n\n"+next.pos.longString + + def append[U >: Nothing](a: => ParseResult[U]): ParseResult[U] = { val alt = a; alt match { + case Success(_, _) => alt + case ns: NoSuccess => if (alt.next.pos < next.pos) this else alt + }} + } + + /** The fatal failure case of ParseResult: contains an error-message and + * the remaining input. + * No back-tracking is done when a parser returns an `Error`. + * + * @param msg An error message string describing the error. + * @param next The parser's unconsumed input at the point where the error occurred. + */ + case class Error(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { + /** The toString method of an Error yields an error message. */ + override def toString = "["+next.pos+"] error: "+msg+"\n\n"+next.pos.longString + def append[U >: Nothing](a: => ParseResult[U]): ParseResult[U] = this + } + + def Parser[T](f: Input => ParseResult[T]): Parser[T] + = new Parser[T]{ def apply(in: Input) = f(in) } + + def OnceParser[T](f: Input => ParseResult[T]): Parser[T] with OnceParser[T] + = new Parser[T] with OnceParser[T] { def apply(in: Input) = f(in) } + + /** The root class of parsers. + * Parsers are functions from the Input type to ParseResult. + */ + abstract class Parser[+T] extends (Input => ParseResult[T]) { + private var name: String = "" + def named(n: String): this.type = {name=n; this} + override def toString() = "Parser ("+ name +")" + + /** An unspecified method that defines the behaviour of this parser. */ + def apply(in: Input): ParseResult[T] + + def flatMap[U](f: T => Parser[U]): Parser[U] + = Parser{ in => this(in) flatMapWithNext(f)} + + def map[U](f: T => U): Parser[U] //= flatMap{x => success(f(x))} + = Parser{ in => this(in) map(f)} + + def filter(p: T => Boolean): Parser[T] + = withFilter(p) + + def withFilter(p: T => Boolean): Parser[T] + = Parser{ in => this(in) filterWithError(p, "Input doesn't match filter: "+_, in)} + + // no filter yet, dealing with zero is tricky! + + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def append[U >: T](p0: => Parser[U]): Parser[U] = { lazy val p = p0 // lazy argument + Parser{ in => this(in) append p(in)} + } + + // the operator formerly known as +++, ++, &, but now, behold the venerable ~ + // it's short, light (looks like whitespace), has few overloaded meaning (thanks to the recent change from ~ to unary_~) + // and we love it! (or do we like `,` better?) + + /** A parser combinator for sequential composition. + * + * `p ~ q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. + * + * @param q a parser that will be executed after `p` (this parser) + * succeeds -- evaluated at most once, and only when necessary. + * @return a `Parser` that -- on success -- returns a `~` (like a `Pair`, + * but easier to pattern match on) that contains the result of `p` and + * that of `q`. The resulting parser fails if either `p` or `q` fails. + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ~ [U](q: => Parser[U]): Parser[~[T, U]] = { lazy val p = q // lazy argument + (for(a <- this; b <- p) yield new ~(a,b)).named("~") + } + + /** A parser combinator for sequential composition which keeps only the right result. + * + * `p ~> q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. + * + * @param q a parser that will be executed after `p` (this parser) + * succeeds -- evaluated at most once, and only when necessary. + * @return a `Parser` that -- on success -- returns the result of `q`. + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ~> [U](q: => Parser[U]): Parser[U] = { lazy val p = q // lazy argument + (for(a <- this; b <- p) yield b).named("~>") + } + + /** A parser combinator for sequential composition which keeps only the left result. + * + * `p <~ q` succeeds if `p` succeeds and `q` succeeds on the input + * left over by `p`. + * + * @note <~ has lower operator precedence than ~ or ~>. + * + * @param q a parser that will be executed after `p` (this parser) succeeds -- evaluated at most once, and only when necessary + * @return a `Parser` that -- on success -- returns the result of `p`. + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def <~ [U](q: => Parser[U]): Parser[T] = { lazy val p = q // lazy argument + (for(a <- this; b <- p) yield a).named("<~") + } + + /* not really useful: V cannot be inferred because Parser is covariant in first type parameter (V is always trivially Nothing) + def ~~ [U, V](q: => Parser[U])(implicit combine: (T, U) => V): Parser[V] = new Parser[V] { + def apply(in: Input) = seq(Parser.this, q)((x, y) => combine(x,y))(in) + } */ + + /** A parser combinator for non-back-tracking sequential composition. + * + * `p ~! q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. + * In case of failure, no back-tracking is performed (in an earlier parser produced by the `|` combinator). + * + * @param p a parser that will be executed after `p` (this parser) succeeds + * @return a `Parser` that -- on success -- returns a `~` (like a Pair, but easier to pattern match on) + * that contains the result of `p` and that of `q`. + * The resulting parser fails if either `p` or `q` fails, this failure is fatal. + */ + def ~! [U](p: => Parser[U]): Parser[~[T, U]] + = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~!") } + + /** A parser combinator for alternative composition. + * + * `p | q` succeeds if `p` succeeds or `q` succeeds. + * Note that `q` is only tried if `p`s failure is non-fatal (i.e., back-tracking is allowed). + * + * @param q a parser that will be executed if `p` (this parser) fails (and allows back-tracking) + * @return a `Parser` that returns the result of the first parser to succeed (out of `p` and `q`) + * The resulting parser succeeds if (and only if) + * - `p` succeeds, ''or'' + * - if `p` fails allowing back-tracking and `q` succeeds. + */ + def | [U >: T](q: => Parser[U]): Parser[U] = append(q).named("|") + + // TODO + /** A parser combinator for alternative with longest match composition. + * + * `p ||| q` succeeds if `p` succeeds or `q` succeeds. + * If `p` and `q` both succeed, the parser that consumed the most characters accepts. + * + * @param q0 a parser that accepts if p consumes less characters. -- evaluated at most once, and only when necessary + * @return a `Parser` that returns the result of the parser consuming the most characters (out of `p` and `q`). + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ||| [U >: T](q0: => Parser[U]): Parser[U] = new Parser[U] { + lazy val q = q0 // lazy argument + def apply(in: Input) = { + val res1 = Parser.this(in) + val res2 = q(in) + + (res1, res2) match { + case (s1 @ Success(_, next1), s2 @ Success(_, next2)) => if (next2.pos < next1.pos) s1 else s2 + case (s1 @ Success(_, _), _) => s1 + case (_, s2 @ Success(_, _)) => s2 + case (e1 @ Error(_, _), _) => e1 + case (f1 @ Failure(_, next1), ns2 @ NoSuccess(_, next2)) => if (next2.pos < next1.pos) f1 else ns2 + } + } + override def toString = "|||" + } + + /** A parser combinator for function application. + * + * `p ^^ f` succeeds if `p` succeeds; it returns `f` applied to the result of `p`. + * + * @param f a function that will be applied to this parser's result (see `map` in `ParseResult`). + * @return a parser that has the same behaviour as the current parser, but whose result is + * transformed by `f`. + */ + def ^^ [U](f: T => U): Parser[U] = map(f).named(toString+"^^") + + /** A parser combinator that changes a successful result into the specified value. + * + * `p ^^^ v` succeeds if `p` succeeds; discards its result, and returns `v` instead. + * + * @param v The new result for the parser, evaluated at most once (if `p` succeeds), not evaluated at all if `p` fails. + * @return a parser that has the same behaviour as the current parser, but whose successful result is `v` + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ^^^ [U](v: => U): Parser[U] = new Parser[U] { + lazy val v0 = v // lazy argument + def apply(in: Input) = Parser.this(in) map (x => v0) + }.named(toString+"^^^") + + /** A parser combinator for partial function application. + * + * `p ^? (f, error)` succeeds if `p` succeeds AND `f` is defined at the result of `p`; + * in that case, it returns `f` applied to the result of `p`. If `f` is not applicable, + * error(the result of `p`) should explain why. + * + * @param f a partial function that will be applied to this parser's result + * (see `mapPartial` in `ParseResult`). + * @param error a function that takes the same argument as `f` and produces an error message + * to explain why `f` wasn't applicable + * @return a parser that succeeds if the current parser succeeds and `f` is applicable + * to the result. If so, the result will be transformed by `f`. + */ + def ^? [U](f: PartialFunction[T, U], error: T => String): Parser[U] = Parser{ in => + this(in).mapPartial(f, error)}.named(toString+"^?") + + /** A parser combinator for partial function application. + * + * `p ^? f` succeeds if `p` succeeds AND `f` is defined at the result of `p`; + * in that case, it returns `f` applied to the result of `p`. + * + * @param f a partial function that will be applied to this parser's result + * (see `mapPartial` in `ParseResult`). + * @return a parser that succeeds if the current parser succeeds and `f` is applicable + * to the result. If so, the result will be transformed by `f`. + */ + def ^? [U](f: PartialFunction[T, U]): Parser[U] = ^?(f, r => "Constructor function not defined at "+r) + + /** A parser combinator that parameterizes a subsequent parser with the + * result of this one. + * + * Use this combinator when a parser depends on the result of a previous + * parser. `p` should be a function that takes the result from the first + * parser and returns the second parser. + * + * `p into fq` (with `fq` typically `{x => q}`) first applies `p`, and + * then, if `p` successfully returned result `r`, applies `fq(r)` to the + * rest of the input. + * + * ''From: G. Hutton. Higher-order functions for parsing. J. Funct. Program., 2(3):323--343, 1992.'' + * + * @example {{{ + * def perlRE = "m" ~> (".".r into (separator => """[^%s]*""".format(separator).r <~ separator)) + * }}} + * + * @param fq a function that, given the result from this parser, returns + * the second parser to be applied + * @return a parser that succeeds if this parser succeeds (with result `x`) + * and if then `fq(x)` succeeds + */ + def into[U](fq: T => Parser[U]): Parser[U] = flatMap(fq) + + // shortcuts for combinators: + + /** Returns `into(fq)`. */ + def >>[U](fq: T => Parser[U])=into(fq) + + /** Returns a parser that repeatedly parses what this parser parses. + * + * @return rep(this) + */ + def * = rep(this) + + /** Returns a parser that repeatedly parses what this parser parses, + * interleaved with the `sep` parser. The `sep` parser specifies how + * the results parsed by this parser should be combined. + * + * @return chainl1(this, sep) + */ + def *[U >: T](sep: => Parser[(U, U) => U]) = chainl1(this, sep) + + // TODO: improve precedence? a ~ b*(",") = a ~ (b*(",")) should be true + + /** Returns a parser that repeatedly (at least once) parses what this parser parses. + * + * @return rep1(this) + */ + def + = rep1(this) + + /** Returns a parser that optionally parses what this parser parses. + * + * @return opt(this) + */ + def ? = opt(this) + + /** Changes the failure message produced by a parser. + * + * This doesn't change the behavior of a parser on neither + * success nor error, just on failure. The semantics are + * slightly different than those obtained by doing `| failure(msg)`, + * in that the message produced by this method will always + * replace the message produced, which is not guaranteed + * by that idiom. + * + * For example, parser `p` below will always produce the + * designated failure message, while `q` will not produce + * it if `sign` is parsed but `number` is not. + * + * {{{ + * def p = sign.? ~ number withFailureMessage "Number expected!" + * def q = sign.? ~ number | failure("Number expected!") + * }}} + * + * @param msg The message that will replace the default failure message. + * @return A parser with the same properties and different failure message. + */ + def withFailureMessage(msg: String) = Parser{ in => + this(in) match { + case Failure(_, next) => Failure(msg, next) + case other => other + } + } + + /** Changes the error message produced by a parser. + * + * This doesn't change the behavior of a parser on neither + * success nor failure, just on error. The semantics are + * slightly different than those obtained by doing `| error(msg)`, + * in that the message produced by this method will always + * replace the message produced, which is not guaranteed + * by that idiom. + * + * For example, parser `p` below will always produce the + * designated error message, while `q` will not produce + * it if `sign` is parsed but `number` is not. + * + * {{{ + * def p = sign.? ~ number withErrorMessage "Number expected!" + * def q = sign.? ~ number | error("Number expected!") + * }}} + * + * @param msg The message that will replace the default error message. + * @return A parser with the same properties and different error message. + */ + def withErrorMessage(msg: String) = Parser{ in => + this(in) match { + case Error(_, next) => Error(msg, next) + case other => other + } + } + } + + /** Wrap a parser so that its failures become errors (the `|` combinator + * will give up as soon as it encounters an error, on failure it simply + * tries the next alternative). + */ + def commit[T](p: => Parser[T]) = Parser{ in => + p(in) match{ + case s @ Success(_, _) => s + case e @ Error(_, _) => e + case f @ Failure(msg, next) => Error(msg, next) + } + } + + /** A parser matching input elements that satisfy a given predicate. + * + * `elem(kind, p)` succeeds if the input starts with an element `e` for which `p(e)` is true. + * + * @param kind The element kind, used for error messages + * @param p A predicate that determines which elements match. + * @return + */ + def elem(kind: String, p: Elem => Boolean) = acceptIf(p)(inEl => kind+" expected") + + /** A parser that matches only the given element `e`. + * + * `elem(e)` succeeds if the input starts with an element `e`. + * + * @param e the `Elem` that must be the next piece of input for the returned parser to succeed + * @return a `Parser` that succeeds if `e` is the next available input (and returns it). + */ + def elem(e: Elem): Parser[Elem] = accept(e) + + /** A parser that matches only the given element `e`. + * + * The method is implicit so that elements can automatically be lifted to their parsers. + * For example, when parsing `Token`s, `Identifier("new")` (which is a `Token`) can be used directly, + * instead of first creating a `Parser` using `accept(Identifier("new"))`. + * + * @param e the `Elem` that must be the next piece of input for the returned parser to succeed + * @return a `tParser` that succeeds if `e` is the next available input. + */ + + implicit def accept(e: Elem): Parser[Elem] = acceptIf(_ == e)("`"+e+"' expected but " + _ + " found") + + /** A parser that matches only the given list of element `es`. + * + * `accept(es)` succeeds if the input subsequently provides the elements in the list `es`. + * + * @param es the list of expected elements + * @return a Parser that recognizes a specified list of elements + */ + def accept[ES <% List[Elem]](es: ES): Parser[List[Elem]] = acceptSeq(es) + + /** The parser that matches an element in the domain of the partial function `f`. + * + * If `f` is defined on the first element in the input, `f` is applied + * to it to produce this parser's result. + * + * Example: The parser `accept("name", {case Identifier(n) => Name(n)})` + * accepts an `Identifier(n)` and returns a `Name(n)` + * + * @param expected a description of the kind of element this parser expects (for error messages) + * @param f a partial function that determines when this parser is successful and what its output is + * @return A parser that succeeds if `f` is applicable to the first element of the input, + * applying `f` to it to produce the result. + */ + def accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = acceptMatch(expected, f) + + /** A parser matching input elements that satisfy a given predicate. + * + * `acceptIf(p)(el => "Unexpected "+el)` succeeds if the input starts with an element `e` for which `p(e)` is true. + * + * @param err A function from the received element into an error message. + * @param p A predicate that determines which elements match. + * @return A parser for elements satisfying p(e). + */ + def acceptIf(p: Elem => Boolean)(err: Elem => String): Parser[Elem] = Parser { in => + if (in.atEnd) Failure("end of input", in) + else if (p(in.first)) Success(in.first, in.rest) + else Failure(err(in.first), in) + } + + /** The parser that matches an element in the domain of the partial function `f`. + * + * If `f` is defined on the first element in the input, `f` is applied + * to it to produce this parser's result. + * + * Example: The parser `acceptMatch("name", {case Identifier(n) => Name(n)})` + * accepts an `Identifier(n)` and returns a `Name(n)` + * + * @param expected a description of the kind of element this parser expects (for error messages) + * @param f a partial function that determines when this parser is successful and what its output is + * @return A parser that succeeds if `f` is applicable to the first element of the input, + * applying `f` to it to produce the result. + */ + def acceptMatch[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = Parser{ in => + if (in.atEnd) Failure("end of input", in) + else if (f.isDefinedAt(in.first)) Success(f(in.first), in.rest) + else Failure(expected+" expected", in) + } + + /** A parser that matches only the given [[scala.collection.Iterable]] collection of elements `es`. + * + * `acceptSeq(es)` succeeds if the input subsequently provides the elements in the iterable `es`. + * + * @param es the list of expected elements + * @return a Parser that recognizes a specified list of elements + */ + def acceptSeq[ES <% Iterable[Elem]](es: ES): Parser[List[Elem]] = + es.foldRight[Parser[List[Elem]]](success(Nil)){(x, pxs) => accept(x) ~ pxs ^^ mkList} + + /** A parser that always fails. + * + * @param msg The error message describing the failure. + * @return A parser that always fails with the specified error message. + */ + def failure(msg: String) = Parser{ in => Failure(msg, in) } + + /** A parser that results in an error. + * + * @param msg The error message describing the failure. + * @return A parser that always fails with the specified error message. + */ + def err(msg: String) = Parser{ in => Error(msg, in) } + + /** A parser that always succeeds. + * + * @param v The result for the parser + * @return A parser that always succeeds, with the given result `v` + */ + def success[T](v: T) = Parser{ in => Success(v, in) } + + /** A helper method that turns a `Parser` into one that will + * print debugging information to stdout before and after + * being applied. + */ + def log[T](p: => Parser[T])(name: String): Parser[T] = Parser{ in => + println("trying "+ name +" at "+ in) + val r = p(in) + println(name +" --> "+ r) + r + } + + /** A parser generator for repetitions. + * + * `rep(p)` repeatedly uses `p` to parse the input until `p` fails + * (the result is a List of the consecutive results of `p`). + * + * @param p a `Parser` that is to be applied successively to the input + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input. + */ + def rep[T](p: => Parser[T]): Parser[List[T]] = rep1(p) | success(List()) + + /** A parser generator for interleaved repetitions. + * + * `repsep(p, q)` repeatedly uses `p` interleaved with `q` to parse the input, until `p` fails. + * (The result is a `List` of the results of `p`.) + * + * Example: `repsep(term, ",")` parses a comma-separated list of term's, yielding a list of these terms. + * + * @param p a `Parser` that is to be applied successively to the input + * @param q a `Parser` that parses the elements that separate the elements parsed by `p` + * @return A parser that returns a list of results produced by repeatedly applying `p` (interleaved with `q`) to the input. + * The results of `p` are collected in a list. The results of `q` are discarded. + */ + def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = + rep1sep(p, q) | success(List()) + + /** A parser generator for non-empty repetitions. + * + * `rep1(p)` repeatedly uses `p` to parse the input until `p` fails -- `p` must succeed at least + * once (the result is a `List` of the consecutive results of `p`) + * + * @param p a `Parser` that is to be applied successively to the input + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input + * (and that only succeeds if `p` matches at least once). + */ + def rep1[T](p: => Parser[T]): Parser[List[T]] = rep1(p, p) + + /** A parser generator for non-empty repetitions. + * + * `rep1(f, p)` first uses `f` (which must succeed) and then repeatedly + * uses `p` to parse the input until `p` fails + * (the result is a `List` of the consecutive results of `f` and `p`) + * + * @param first a `Parser` that parses the first piece of input + * @param p0 a `Parser` that is to be applied successively to the rest of the input (if any) -- evaluated at most once, and only when necessary + * @return A parser that returns a list of results produced by first applying `f` and then + * repeatedly `p` to the input (it only succeeds if `f` matches). + */ + @migration("The `p0` call-by-name arguments is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]] = Parser { in => + lazy val p = p0 // lazy argument + val elems = new ListBuffer[T] + + def continue(in: Input): ParseResult[List[T]] = { + val p0 = p // avoid repeatedly re-evaluating by-name parser + @tailrec def applyp(in0: Input): ParseResult[List[T]] = p0(in0) match { + case Success(x, rest) => elems += x ; applyp(rest) + case e @ Error(_, _) => e // still have to propagate error + case _ => Success(elems.toList, in0) + } + + applyp(in) + } + + first(in) match { + case Success(x, rest) => elems += x ; continue(rest) + case ns: NoSuccess => ns + } + } + + /** A parser generator for a specified number of repetitions. + * + * `repN(n, p)` uses `p` exactly `n` time to parse the input + * (the result is a `List` of the `n` consecutive results of `p`). + * + * @param p a `Parser` that is to be applied successively to the input + * @param num the exact number of times `p` must succeed + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input + * (and that only succeeds if `p` matches exactly `n` times). + */ + def repN[T](num: Int, p: => Parser[T]): Parser[List[T]] = + if (num == 0) success(Nil) else Parser { in => + val elems = new ListBuffer[T] + val p0 = p // avoid repeatedly re-evaluating by-name parser + + @tailrec def applyp(in0: Input): ParseResult[List[T]] = + if (elems.length == num) Success(elems.toList, in0) + else p0(in0) match { + case Success(x, rest) => elems += x ; applyp(rest) + case ns: NoSuccess => ns + } + + applyp(in) + } + + /** A parser generator for non-empty repetitions. + * + * `rep1sep(p, q)` repeatedly applies `p` interleaved with `q` to parse the + * input, until `p` fails. The parser `p` must succeed at least once. + * + * @param p a `Parser` that is to be applied successively to the input + * @param q a `Parser` that parses the elements that separate the elements parsed by `p` + * (interleaved with `q`) + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input + * (and that only succeeds if `p` matches at least once). + * The results of `p` are collected in a list. The results of `q` are discarded. + */ + def rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] = + p ~ rep(q ~> p) ^^ {case x~y => x::y} + + /** A parser generator that, roughly, generalises the rep1sep generator so + * that `q`, which parses the separator, produces a left-associative + * function that combines the elements it separates. + * + * ''From: J. Fokker. Functional parsers. In J. Jeuring and E. Meijer, editors, Advanced Functional Programming, + * volume 925 of Lecture Notes in Computer Science, pages 1--23. Springer, 1995.'' + * + * @param p a parser that parses the elements + * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that + * combines two elements into one + */ + def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] + = chainl1(p, p, q) + + /** A parser generator that, roughly, generalises the `rep1sep` generator + * so that `q`, which parses the separator, produces a left-associative + * function that combines the elements it separates. + * + * @param first a parser that parses the first element + * @param p a parser that parses the subsequent elements + * @param q a parser that parses the token(s) separating the elements, + * yielding a left-associative function that combines two elements + * into one + */ + def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] + = first ~ rep(q ~ p) ^^ { + case x ~ xs => xs.foldLeft(x: T){case (a, f ~ b) => f(a, b)} // x's type annotation is needed to deal with changed type inference due to SI-5189 + } + + /** A parser generator that generalises the `rep1sep` generator so that `q`, + * which parses the separator, produces a right-associative function that + * combines the elements it separates. Additionally, the right-most (last) + * element and the left-most combining function have to be supplied. + * + * rep1sep(p: Parser[T], q) corresponds to chainr1(p, q ^^ cons, cons, Nil) (where val cons = (x: T, y: List[T]) => x :: y) + * + * @param p a parser that parses the elements + * @param q a parser that parses the token(s) separating the elements, yielding a right-associative function that + * combines two elements into one + * @param combine the "last" (left-most) combination function to be applied + * @param first the "first" (right-most) element to be combined + */ + def chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] + = p ~ rep(q ~ p) ^^ { + case x ~ xs => (new ~(combine, x) :: xs).foldRight(first){case (f ~ a, b) => f(a, b)} + } + + /** A parser generator for optional sub-phrases. + * + * `opt(p)` is a parser that returns `Some(x)` if `p` returns `x` and `None` if `p` fails. + * + * @param p A `Parser` that is tried on the input + * @return a `Parser` that always succeeds: either with the result provided by `p` or + * with the empty result + */ + def opt[T](p: => Parser[T]): Parser[Option[T]] = + p ^^ (x => Some(x)) | success(None) + + /** Wrap a parser so that its failures and errors become success and + * vice versa -- it never consumes any input. + */ + def not[T](p: => Parser[T]): Parser[Unit] = Parser { in => + p(in) match { + case Success(_, _) => Failure("Expected failure", in) + case _ => Success((), in) + } + } + + /** A parser generator for guard expressions. The resulting parser will + * fail or succeed just like the one given as parameter but it will not + * consume any input. + * + * @param p a `Parser` that is to be applied to the input + * @return A parser that returns success if and only if `p` succeeds but + * never consumes any input + */ + def guard[T](p: => Parser[T]): Parser[T] = Parser { in => + p(in) match{ + case s@ Success(s1,_) => Success(s1, in) + case e => e + } + } + + /** `positioned` decorates a parser's result with the start position of the + * input it consumed. + * + * @param p a `Parser` whose result conforms to `Positional`. + * @return A parser that has the same behaviour as `p`, but which marks its + * result with the start position of the input it consumed, + * if it didn't already have a position. + */ + def positioned[T <: Positional](p: => Parser[T]): Parser[T] = Parser { in => + p(in) match { + case Success(t, in1) => Success(if (t.pos == NoPosition) t setPos in.pos else t, in1) + case ns: NoSuccess => ns + } + } + + /** A parser generator delimiting whole phrases (i.e. programs). + * + * `phrase(p)` succeeds if `p` succeeds and no input is left over after `p`. + * + * @param p the parser that must consume all input for the resulting parser + * to succeed. + * @return a parser that has the same result as `p`, but that only succeeds + * if `p` consumed all the input. + */ + def phrase[T](p: Parser[T]) = new Parser[T] { + def apply(in: Input) = lastNoSuccessVar.withValue(None) { + p(in) match { + case s @ Success(out, in1) => + if (in1.atEnd) + s + else + lastNoSuccessVar.value filterNot { _.next.pos < in1.pos } getOrElse Failure("end of input expected", in1) + case ns => lastNoSuccessVar.value.getOrElse(ns) + } + } + } + + /** Given a concatenation with a repetition (list), move the concatenated element into the list */ + def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } + + /** A wrapper over sequence of matches. + * + * Given `p1: Parser[A]` and `p2: Parser[B]`, a parser composed with + * `p1 ~ p2` will have type `Parser[~[A, B]]`. The successful result + * of the parser can be extracted from this case class. + * + * It also enables pattern matching, so something like this is possible: + * + * {{{ + * def concat(p1: Parser[String], p2: Parser[String]): Parser[String] = + * p1 ~ p2 ^^ { case a ~ b => a + b } + * }}} + */ + case class ~[+a, +b](_1: a, _2: b) { + override def toString = "("+ _1 +"~"+ _2 +")" + } + + /** A parser whose `~` combinator disallows back-tracking. + */ + trait OnceParser[+T] extends Parser[T] { + override def ~ [U](p: => Parser[U]): Parser[~[T, U]] + = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~") } + } +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala new file mode 100644 index 0000000000..8ebbc573ad --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala @@ -0,0 +1,166 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import java.util.regex.Pattern +import scala.util.matching.Regex +import scala.util.parsing.input._ +import scala.collection.immutable.PagedSeq +import scala.language.implicitConversions + +/** The ''most important'' differences between `RegexParsers` and + * [[scala.util.parsing.combinator.Parsers]] are: + * + * - `Elem` is defined to be [[scala.Char]] + * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, + * so that string literals can be used as parser combinators. + * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, + * so that regex expressions can be used as parser combinators. + * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, + * skip any whitespace before each parser is called. + * - Protected val `whiteSpace` returns a regex that identifies whitespace. + * + * For example, this creates a very simple calculator receiving `String` input: + * + * {{{ + * object Calculator extends RegexParsers { + * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } + * def factor: Parser[Double] = number | "(" ~> expr <~ ")" + * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { + * case number ~ list => (number /: list) { + * case (x, "*" ~ y) => x * y + * case (x, "/" ~ y) => x / y + * } + * } + * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { + * case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /: + * case (x, "+" ~ y) => x + y + * case (x, "-" ~ y) => x - y + * } + * } + * + * def apply(input: String): Double = parseAll(expr, input) match { + * case Success(result, _) => result + * case failure : NoSuccess => scala.sys.error(failure.msg) + * } + * } + * }}} + */ +trait RegexParsers extends Parsers { + + type Elem = Char + + protected val whiteSpace = """\s+""".r + + def skipWhitespace = whiteSpace.toString.length > 0 + + /** Method called to handle whitespace before parsers. + * + * It checks `skipWhitespace` and, if true, skips anything + * matching `whiteSpace` starting from the current offset. + * + * @param source The input being parsed. + * @param offset The offset into `source` from which to match. + * @return The offset to be used for the next parser. + */ + protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = + if (skipWhitespace) + (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { + case Some(matched) => offset + matched.end + case None => offset + } + else + offset + + /** A parser that matches a literal string */ + implicit def literal(s: String): Parser[String] = new Parser[String] { + def apply(in: Input) = { + val source = in.source + val offset = in.offset + val start = handleWhiteSpace(source, offset) + var i = 0 + var j = start + while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) { + i += 1 + j += 1 + } + if (i == s.length) + Success(source.subSequence(start, j).toString, in.drop(j - offset)) + else { + val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" + Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset)) + } + } + } + + /** A parser that matches a regex string */ + implicit def regex(r: Regex): Parser[String] = new Parser[String] { + def apply(in: Input) = { + val source = in.source + val offset = in.offset + val start = handleWhiteSpace(source, offset) + (r findPrefixMatchOf (source.subSequence(start, source.length))) match { + case Some(matched) => + Success(source.subSequence(start, start + matched.end).toString, + in.drop(start + matched.end - offset)) + case None => + val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" + Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset)) + } + } + } + + /** `positioned` decorates a parser's result with the start position of the input it consumed. + * If whitespace is being skipped, then it is skipped before the start position is recorded. + * + * @param p a `Parser` whose result conforms to `Positional`. + * @return A parser that has the same behaviour as `p`, but which marks its result with the + * start position of the input it consumed after whitespace has been skipped, if it + * didn't already have a position. + */ + override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = { + val pp = super.positioned(p) + new Parser[T] { + def apply(in: Input) = { + val offset = in.offset + val start = handleWhiteSpace(in.source, offset) + pp(in.drop (start - offset)) + } + } + } + + override def phrase[T](p: Parser[T]): Parser[T] = + super.phrase(p <~ opt("""\z""".r)) + + /** Parse some prefix of reader `in` with parser `p`. */ + def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = + p(in) + + /** Parse some prefix of character sequence `in` with parser `p`. */ + def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = + p(new CharSequenceReader(in)) + + /** Parse some prefix of reader `in` with parser `p`. */ + def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = + p(new PagedSeqReader(PagedSeq.fromReader(in))) + + /** Parse all of reader `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = + parse(phrase(p), in) + + /** Parse all of reader `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = + parse(phrase(p), in) + + /** Parse all of character sequence `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = + parse(phrase(p), in) +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala new file mode 100644 index 0000000000..d8029d068f --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala @@ -0,0 +1,40 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing +package combinator +package lexical + +import token._ +import input.CharArrayReader.EofCh + +/** This component complements the `Scanners` component with + * common operations for lexical parsers. + * + * Refer to [[scala.util.parsing.combinator.lexical.StdLexical]] + * for a concrete implementation for a simple, Scala-like language. + * + * @author Martin Odersky, Adriaan Moors + */ +abstract class Lexical extends Scanners with Tokens { + + /** A character-parser that matches a letter (and returns it).*/ + def letter = elem("letter", _.isLetter) + + /** A character-parser that matches a digit (and returns it).*/ + def digit = elem("digit", _.isDigit) + + /** A character-parser that matches any character except the ones given in `cs` (and returns it).*/ + def chrExcept(cs: Char*) = elem("", ch => (cs forall (ch != _))) + + /** A character-parser that matches a white-space character (and returns it).*/ + def whitespaceChar = elem("space char", ch => ch <= ' ' && ch != EofCh) +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala new file mode 100644 index 0000000000..2e12915bb8 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala @@ -0,0 +1,63 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package lexical + +import input._ + +/** This component provides core functionality for lexical parsers. + * + * See its subclasses [[scala.util.parsing.combinator.lexical.Lexical]] and -- most interestingly + * [[scala.util.parsing.combinator.lexical.StdLexical]], for more functionality. + * + * @author Martin Odersky, Adriaan Moors + */ +trait Scanners extends Parsers { + type Elem = Char + type Token + + /** This token is produced by a scanner `Scanner` when scanning failed. */ + def errorToken(msg: String): Token + + /** A parser that produces a token (from a stream of characters). */ + def token: Parser[Token] + + /** A parser for white-space -- its result will be discarded. */ + def whitespace: Parser[Any] + + /** `Scanner` is essentially¹ a parser that produces `Token`s + * from a stream of characters. The tokens it produces are typically + * passed to parsers in `TokenParsers`. + * + * @note ¹ `Scanner` is really a `Reader` of `Token`s + */ + class Scanner(in: Reader[Char]) extends Reader[Token] { + /** Convenience constructor (makes a character reader out of the given string) */ + def this(in: String) = this(new CharArrayReader(in.toCharArray())) + private val (tok, rest1, rest2) = whitespace(in) match { + case Success(_, in1) => + token(in1) match { + case Success(tok, in2) => (tok, in1, in2) + case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) + } + case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) + } + private def skip(in: Reader[Char]) = if (in.atEnd) in else in.rest + + override def source: java.lang.CharSequence = in.source + override def offset: Int = in.offset + def first = tok + def rest = new Scanner(rest2) + def pos = rest1.pos + def atEnd = in.atEnd || (whitespace(in) match { case Success(_, in1) => in1.atEnd case _ => false }) + } +} + diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala new file mode 100644 index 0000000000..32d7502cda --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala @@ -0,0 +1,87 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package lexical + +import token._ +import input.CharArrayReader.EofCh +import scala.collection.mutable + +/** This component provides a standard lexical parser for a simple, + * [[http://scala-lang.org Scala]]-like language. It parses keywords and + * identifiers, numeric literals (integers), strings, and delimiters. + * + * To distinguish between identifiers and keywords, it uses a set of + * reserved identifiers: every string contained in `reserved` is returned + * as a keyword token. (Note that `=>` is hard-coded as a keyword.) + * Additionally, the kinds of delimiters can be specified by the + * `delimiters` set. + * + * Usually this component is used to break character-based input into + * bigger tokens, which are then passed to a token-parser (see + * [[scala.util.parsing.combinator.syntactical.TokenParsers]].) + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +class StdLexical extends Lexical with StdTokens { + // see `token` in `Scanners` + def token: Parser[Token] = + ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } + | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } + | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") } + | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") } + | EofCh ^^^ EOF + | '\'' ~> failure("unclosed string literal") + | '\"' ~> failure("unclosed string literal") + | delim + | failure("illegal character") + ) + + /** Returns the legal identifier chars, except digits. */ + def identChar = letter | elem('_') + + // see `whitespace in `Scanners` + def whitespace: Parser[Any] = rep[Any]( + whitespaceChar + | '/' ~ '*' ~ comment + | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) + | '/' ~ '*' ~ failure("unclosed comment") + ) + + protected def comment: Parser[Any] = ( + '*' ~ '/' ^^ { case _ => ' ' } + | chrExcept(EofCh) ~ comment + ) + + /** The set of reserved identifiers: these will be returned as `Keyword`s. */ + val reserved = new mutable.HashSet[String] + + /** The set of delimiters (ordering does not matter). */ + val delimiters = new mutable.HashSet[String] + + protected def processIdent(name: String) = + if (reserved contains name) Keyword(name) else Identifier(name) + + private lazy val _delim: Parser[Token] = { + // construct parser for delimiters by |'ing together the parsers for the individual delimiters, + // starting with the longest one -- otherwise a delimiter D will never be matched if there is + // another delimiter that is a prefix of D + def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) } + + val d = new Array[String](delimiters.size) + delimiters.copyToArray(d, 0) + scala.util.Sorting.quickSort(d) + (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x) + } + protected def delim: Parser[Token] = _delim +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala new file mode 100644 index 0000000000..5b9d14c9a7 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala @@ -0,0 +1,32 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing +package combinator +package syntactical + +import token._ +import lexical.StdLexical +import scala.language.implicitConversions + +/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. +* +* @author Martin Odersky, Adriaan Moors + */ +class StandardTokenParsers extends StdTokenParsers { + type Tokens = StdTokens + val lexical = new StdLexical + + //an implicit keyword function that gives a warning when a given word is not in the reserved/delimiters list + override implicit def keyword(chars : String): Parser[String] = + if(lexical.reserved.contains(chars) || lexical.delimiters.contains(chars)) super.keyword(chars) + else failure("You are trying to parse \""+chars+"\", but it is neither contained in the delimiters list, nor in the reserved keyword list of your lexical object") + +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala new file mode 100644 index 0000000000..adcf85da7a --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala @@ -0,0 +1,52 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing +package combinator +package syntactical + +import token._ +import scala.collection.mutable +import scala.language.implicitConversions + +/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. +* +* @author Martin Odersky, Adriaan Moors + */ +trait StdTokenParsers extends TokenParsers { + type Tokens <: StdTokens + import lexical.{Keyword, NumericLit, StringLit, Identifier} + + protected val keywordCache = mutable.HashMap[String, Parser[String]]() + + /** A parser which matches a single keyword token. + * + * @param chars The character string making up the matched keyword. + * @return a `Parser` that matches the given string + */ +// implicit def keyword(chars: String): Parser[String] = accept(Keyword(chars)) ^^ (_.chars) + implicit def keyword(chars: String): Parser[String] = + keywordCache.getOrElseUpdate(chars, accept(Keyword(chars)) ^^ (_.chars)) + + /** A parser which matches a numeric literal */ + def numericLit: Parser[String] = + elem("number", _.isInstanceOf[NumericLit]) ^^ (_.chars) + + /** A parser which matches a string literal */ + def stringLit: Parser[String] = + elem("string literal", _.isInstanceOf[StringLit]) ^^ (_.chars) + + /** A parser which matches an identifier */ + def ident: Parser[String] = + elem("identifier", _.isInstanceOf[Identifier]) ^^ (_.chars) +} + + diff --git a/src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala new file mode 100644 index 0000000000..b06babcd7e --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing +package combinator +package syntactical + +/** This is the core component for token-based parsers. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait TokenParsers extends Parsers { + /** `Tokens` is the abstract type of the `Token`s consumed by the parsers in this component. */ + type Tokens <: token.Tokens + + /** `lexical` is the component responsible for consuming some basic kind of + * input (usually character-based) and turning it into the tokens + * understood by these parsers. + */ + val lexical: Tokens + + /** The input-type for these parsers*/ + type Elem = lexical.Token + +} + + diff --git a/src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala b/src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala new file mode 100644 index 0000000000..a102d1541e --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala @@ -0,0 +1,39 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package token + +/** This component provides the standard `Token`s for a simple, Scala-like language. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait StdTokens extends Tokens { + /** The class of keyword tokens */ + case class Keyword(chars: String) extends Token { + override def toString = "`"+chars+"'" + } + + /** The class of numeric literal tokens */ + case class NumericLit(chars: String) extends Token { + override def toString = chars + } + + /** The class of string literal tokens */ + case class StringLit(chars: String) extends Token { + override def toString = "\""+chars+"\"" + } + + /** The class of identifier tokens */ + case class Identifier(chars: String) extends Token { + override def toString = "identifier "+chars + } +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala b/src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala new file mode 100644 index 0000000000..5c3f1f95b5 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala @@ -0,0 +1,43 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package token + +/** This component provides the notion of `Token`, the unit of information that is passed from lexical + * parsers in the `Lexical` component to the parsers in the `TokenParsers` component. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait Tokens { + /** Objects of this type are produced by a lexical parser or ``scanner'', and consumed by a parser. + * + * @see [[scala.util.parsing.combinator.syntactical.TokenParsers]] + */ + abstract class Token { + def chars: String + } + + /** A class of error tokens. Error tokens are used to communicate + * errors detected during lexical analysis + */ + case class ErrorToken(msg: String) extends Token { + def chars = "*** error: "+msg + } + + /** A class for end-of-file tokens */ + case object EOF extends Token { + def chars = "" + } + + /** This token is produced by a scanner `Scanner` when scanning failed. */ + def errorToken(msg: String): Token = new ErrorToken(msg) +} diff --git a/src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala b/src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala new file mode 100644 index 0000000000..22530cb9aa --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.input + +/** An object encapsulating basic character constants. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +object CharArrayReader { + final val EofCh = '\032' +} + +/** A character array reader reads a stream of characters (keeping track of their positions) + * from an array. + * + * @param chars an array of characters + * @param index starting offset into the array; the first element returned will be `source(index)` + * + * @author Martin Odersky + * @author Adriaan Moors + */ +class CharArrayReader(chars: Array[Char], index: Int) extends CharSequenceReader(chars, index) { + + def this(chars: Array[Char]) = this(chars, 0) + +} diff --git a/src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala b/src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala new file mode 100644 index 0000000000..8e7751cc82 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala @@ -0,0 +1,66 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.input + +/** An object encapsulating basic character constants. + * + * @author Martin Odersky, Adriaan Moors + */ +object CharSequenceReader { + final val EofCh = '\032' +} + +/** A character array reader reads a stream of characters (keeping track of their positions) + * from an array. + * + * @param source the source sequence + * @param offset starting offset. + * + * @author Martin Odersky + */ +class CharSequenceReader(override val source: java.lang.CharSequence, + override val offset: Int) extends Reader[Char] { + import CharSequenceReader._ + + /** Construct a `CharSequenceReader` with its first element at + * `source(0)` and position `(1,1)`. + */ + def this(source: java.lang.CharSequence) = this(source, 0) + + /** Returns the first element of the reader, or EofCh if reader is at its end. + */ + def first = + if (offset < source.length) source.charAt(offset) else EofCh + + /** Returns a CharSequenceReader consisting of all elements except the first. + * + * @return If `atEnd` is `true`, the result will be `this`; + * otherwise, it's a `CharSequenceReader` containing the rest of input. + */ + def rest: CharSequenceReader = + if (offset < source.length) new CharSequenceReader(source, offset + 1) + else this + + /** The position of the first element in the reader. + */ + def pos: Position = new OffsetPosition(source, offset) + + /** true iff there are no more elements in this reader (except for trailing + * EofCh's) + */ + def atEnd = offset >= source.length + + /** Returns an abstract reader consisting of all elements except the first + * `n` elements. + */ + override def drop(n: Int): CharSequenceReader = + new CharSequenceReader(source, offset + n) +} diff --git a/src/parser-combinators/scala/util/parsing/input/NoPosition.scala b/src/parser-combinators/scala/util/parsing/input/NoPosition.scala new file mode 100644 index 0000000000..4a32264b79 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/NoPosition.scala @@ -0,0 +1,25 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.input + +/** Undefined position. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +object NoPosition extends Position { + def line = 0 + def column = 0 + override def toString = "" + override def longString = toString + def lineContents = "" +} diff --git a/src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala b/src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala new file mode 100644 index 0000000000..23f79c74d1 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala @@ -0,0 +1,73 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +import scala.collection.mutable.ArrayBuffer + +/** `OffsetPosition` is a standard class for positions + * represented as offsets into a source ``document''. + * + * @param source The source document + * @param offset The offset indicating the position + * + * @author Martin Odersky + */ +case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends Position { + + /** An index that contains all line starts, including first line, and eof. */ + private lazy val index: Array[Int] = { + val lineStarts = new ArrayBuffer[Int] + lineStarts += 0 + for (i <- 0 until source.length) + if (source.charAt(i) == '\n') lineStarts += (i + 1) + lineStarts += source.length + lineStarts.toArray + } + + /** The line number referred to by the position; line numbers start at 1. */ + def line: Int = { + var lo = 0 + var hi = index.length - 1 + while (lo + 1 < hi) { + val mid = (hi + lo) / 2 + if (offset < index(mid)) hi = mid + else lo = mid + } + lo + 1 + } + + /** The column number referred to by the position; column numbers start at 1. */ + def column: Int = offset - index(line - 1) + 1 + + /** The contents of the line numbered at the current offset. + * + * @return the line at `offset` (not including a newline) + */ + def lineContents: String = + source.subSequence(index(line - 1), index(line)).toString + + /** Returns a string representation of the `Position`, of the form `line.column`. */ + override def toString = line+"."+column + + /** Compare this position to another, by first comparing their line numbers, + * and then -- if necessary -- using the columns to break a tie. + * + * @param that a `Position` to compare to this `Position` + * @return true if this position's line number or (in case of equal line numbers) + * column is smaller than the corresponding components of `that` + */ + override def <(that: Position) = that match { + case OffsetPosition(_, that_offset) => + this.offset < that_offset + case _ => + this.line < that.line || + this.line == that.line && this.column < that.column + } +} diff --git a/src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala b/src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala new file mode 100644 index 0000000000..468f1f9a5f --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala @@ -0,0 +1,71 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.input + +import scala.collection.immutable.PagedSeq + +/** An object encapsulating basic character constants. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +object PagedSeqReader { + final val EofCh = '\032' +} + +/** A character array reader reads a stream of characters (keeping track of their positions) + * from an array. + * + * @param seq the source sequence + * @param offset starting offset. + * + * @author Martin Odersky + */ +class PagedSeqReader(seq: PagedSeq[Char], + override val offset: Int) extends Reader[Char] { + import PagedSeqReader._ + + override lazy val source: java.lang.CharSequence = seq + + /** Construct a `PagedSeqReader` with its first element at + * `source(0)` and position `(1,1)`. + */ + def this(seq: PagedSeq[Char]) = this(seq, 0) + + /** Returns the first element of the reader, or EofCh if reader is at its end + */ + def first = + if (seq.isDefinedAt(offset)) seq(offset) else EofCh + + /** Returns a PagedSeqReader consisting of all elements except the first + * + * @return If `atEnd` is `true`, the result will be `this`; + * otherwise, it's a `PagedSeqReader` containing the rest of input. + */ + def rest: PagedSeqReader = + if (seq.isDefinedAt(offset)) new PagedSeqReader(seq, offset + 1) + else this + + /** The position of the first element in the reader. + */ + def pos: Position = new OffsetPosition(source, offset) + + /** true iff there are no more elements in this reader (except for trailing + * EofCh's). + */ + def atEnd = !seq.isDefinedAt(offset) + + /** Returns an abstract reader consisting of all elements except the first + * `n` elements. + */ + override def drop(n: Int): PagedSeqReader = + new PagedSeqReader(seq, offset + n) +} diff --git a/src/parser-combinators/scala/util/parsing/input/Position.scala b/src/parser-combinators/scala/util/parsing/input/Position.scala new file mode 100644 index 0000000000..b7995a6471 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/Position.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +/** `Position` is the base trait for objects describing a position in a `document`. + * + * It provides functionality for: + * - generating a visual representation of this position (`longString`); + * - comparing two positions (`<`). + * + * To use this class for a concrete kind of `document`, implement the `lineContents` method. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait Position { + + /** The line number referred to by the position; line numbers start at 1. */ + def line: Int + + /** The column number referred to by the position; column numbers start at 1. */ + def column: Int + + /** The contents of the line at this position. (must not contain a new-line character). + */ + protected def lineContents: String + + /** Returns a string representation of the `Position`, of the form `line.column`. */ + override def toString = ""+line+"."+column + + /** Returns a more ``visual'' representation of this position. + * More precisely, the resulting string consists of two lines: + * 1. the line in the document referred to by this position + * 2. a caret indicating the column + * + * Example: + * {{{ + * List(this, is, a, line, from, the, document) + * ^ + * }}} + */ + def longString = lineContents+"\n"+lineContents.take(column-1).map{x => if (x == '\t') x else ' ' } + "^" + + /** Compare this position to another, by first comparing their line numbers, + * and then -- if necessary -- using the columns to break a tie. + * + * @param `that` a `Position` to compare to this `Position` + * @return true if this position's line number or (in case of equal line numbers) + * column is smaller than the corresponding components of `that` + */ + def <(that: Position) = { + this.line < that.line || + this.line == that.line && this.column < that.column + } +} diff --git a/src/parser-combinators/scala/util/parsing/input/Positional.scala b/src/parser-combinators/scala/util/parsing/input/Positional.scala new file mode 100644 index 0000000000..cfde67cadd --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/Positional.scala @@ -0,0 +1,30 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +/** A trait for objects that have a source position. + * + * @author Martin Odersky, Adriaan Moors + */ +trait Positional { + + /** The source position of this object, initially set to undefined. */ + var pos: Position = NoPosition + + /** If current source position is undefined, update it with given position `newpos` + * @return the object itself + */ + def setPos(newpos: Position): this.type = { + if (pos eq NoPosition) pos = newpos + this + } +} + + diff --git a/src/parser-combinators/scala/util/parsing/input/Reader.scala b/src/parser-combinators/scala/util/parsing/input/Reader.scala new file mode 100644 index 0000000000..9dbf08a7ca --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/Reader.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.input + + +/** An interface for streams of values that have positions. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +abstract class Reader[+T] { + + /** If this is a reader over character sequences, the underlying char sequence. + * If not, throws a `NoSuchMethodError` exception. + * + * @throws [[java.lang.NoSuchMethodError]] if this not a char sequence reader. + */ + def source: java.lang.CharSequence = + throw new NoSuchMethodError("not a char sequence reader") + + def offset: Int = + throw new NoSuchMethodError("not a char sequence reader") + + /** Returns the first element of the reader + */ + def first: T + + /** Returns an abstract reader consisting of all elements except the first + * + * @return If `atEnd` is `true`, the result will be `this'; + * otherwise, it's a `Reader` containing more elements. + */ + def rest: Reader[T] + + /** Returns an abstract reader consisting of all elements except the first `n` elements. + */ + def drop(n: Int): Reader[T] = { + var r: Reader[T] = this + var cnt = n + while (cnt > 0) { + r = r.rest; cnt -= 1 + } + r + } + + /** The position of the first element in the reader. + */ + def pos: Position + + /** `true` iff there are no more elements in this reader. + */ + def atEnd: Boolean +} diff --git a/src/parser-combinators/scala/util/parsing/input/StreamReader.scala b/src/parser-combinators/scala/util/parsing/input/StreamReader.scala new file mode 100644 index 0000000000..30eb097fd7 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/StreamReader.scala @@ -0,0 +1,76 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +import java.io.BufferedReader +import scala.collection.immutable.PagedSeq + +/** An object to create a `StreamReader` from a `java.io.Reader`. + * + * @author Miles Sabin + */ +object StreamReader { + final val EofCh = '\032' + + /** Create a `StreamReader` from a `java.io.Reader`. + * + * @param in the `java.io.Reader` that provides the underlying + * stream of characters for this Reader. + */ + def apply(in: java.io.Reader): StreamReader = { + new StreamReader(PagedSeq.fromReader(in), 0, 1) + } +} + +/** A StreamReader reads from a character sequence, typically created as a PagedSeq + * from a java.io.Reader + * + * NOTE: + * StreamReaders do not really fulfill the new contract for readers, which + * requires a `source` CharSequence representing the full input. + * Instead source is treated line by line. + * As a consequence, regex matching cannot extend beyond a single line + * when a StreamReader are used for input. + * + * If you need to match regexes spanning several lines you should consider + * class `PagedSeqReader` instead. + * + * @author Miles Sabin + * @author Martin Odersky + */ +sealed class StreamReader(seq: PagedSeq[Char], off: Int, lnum: Int) extends PagedSeqReader(seq, off) { + import StreamReader._ + + override def rest: StreamReader = + if (off == seq.length) this + else if (seq(off) == '\n') + new StreamReader(seq.slice(off + 1), 0, lnum + 1) + else new StreamReader(seq, off + 1, lnum) + + private def nextEol = { + var i = off + while (i < seq.length && seq(i) != '\n' && seq(i) != EofCh) i += 1 + i + } + + override def drop(n: Int): StreamReader = { + val eolPos = nextEol + if (eolPos < off + n && eolPos < seq.length) + new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1).drop(off + n - (eolPos + 1)) + else + new StreamReader(seq, off + n, lnum) + } + + override def pos: Position = new Position { + def line = lnum + def column = off + 1 + def lineContents = seq.slice(0, nextEol).toString + } +} diff --git a/src/parser-combinators/scala/util/parsing/json/JSON.scala b/src/parser-combinators/scala/util/parsing/json/JSON.scala new file mode 100644 index 0000000000..b06dddf532 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/json/JSON.scala @@ -0,0 +1,97 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.json + +/** + * This object provides a simple interface to the JSON parser class. + * The default conversion for numerics is into a double. If you wish to + * override this behavior at the global level, you can set the + * `globalNumberParser` property to your own `(String => Any)` function. + * If you only want to override at the per-thread level then you can set + * the `perThreadNumberParser` property to your function. For example: + * {{{ + * val myConversionFunc = {input : String => BigDecimal(input)} + * + * // Global override + * JSON.globalNumberParser = myConversionFunc + * + * // Per-thread override + * JSON.perThreadNumberParser = myConversionFunc + * }}} + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This object will be removed.", "2.11.0") +object JSON extends Parser { + + /** + * This method converts ''raw'' results back into the original, deprecated + * form. + */ + private def unRaw (in : Any) : Any = in match { + case JSONObject(obj) => obj.map({ case (k,v) => (k,unRaw(v))}).toList + case JSONArray(list) => list.map(unRaw) + case x => x + } + + /** + * Parse the given `JSON` string and return a list of elements. If the + * string is a `JSON` object it will be a `JSONObject`. If it's a `JSON` + * array it will be a `JSONArray`. + * + * @param input the given `JSON` string. + * @return an optional `JSONType` element. + */ + def parseRaw(input : String) : Option[JSONType] = + phrase(root)(new lexical.Scanner(input)) match { + case Success(result, _) => Some(result) + case _ => None + } + + /** + * Parse the given `JSON` string and return either a `List[Any]` + * if the `JSON` string specifies an `Array`, or a + * `Map[String,Any]` if the `JSON` string specifies an object. + * + * @param input the given `JSON` string. + * @return an optional list or map. + */ + def parseFull(input: String): Option[Any] = + parseRaw(input) match { + case Some(data) => Some(resolveType(data)) + case None => None + } + + /** + * A utility method to resolve a parsed `JSON` list into objects or + * arrays. See the `parse` method for details. + */ + def resolveType(input: Any): Any = input match { + case JSONObject(data) => data.transform { + case (k,v) => resolveType(v) + } + case JSONArray(data) => data.map(resolveType) + case x => x + } + + /** + * The global (VM) default function for converting a string to a numeric value. + */ + def globalNumberParser_=(f: NumericParser) { defaultNumberParser = f } + def globalNumberParser : NumericParser = defaultNumberParser + + /** + * Defines the function used to convert a numeric string literal into a + * numeric format on a per-thread basis. Use `globalNumberParser` for a + * global override. + */ + def perThreadNumberParser_=(f : NumericParser) { numberParser.set(f) } + def perThreadNumberParser : NumericParser = numberParser.get() +} diff --git a/src/parser-combinators/scala/util/parsing/json/Lexer.scala b/src/parser-combinators/scala/util/parsing/json/Lexer.scala new file mode 100644 index 0000000000..7fc4e0bab6 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/json/Lexer.scala @@ -0,0 +1,90 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.json + +import scala.util.parsing.combinator._ +import scala.util.parsing.combinator.lexical._ +import scala.util.parsing.input.CharArrayReader.EofCh + +/** + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +class Lexer extends StdLexical with ImplicitConversions { + + override def token: Parser[Token] = + //( '\"' ~ rep(charSeq | letter) ~ '\"' ^^ lift(StringLit) + ( string ^^ StringLit + | number ~ letter ^^ { case n ~ l => ErrorToken("Invalid number format : " + n + l) } + | '-' ~> whitespace ~ number ~ letter ^^ { case ws ~ num ~ l => ErrorToken("Invalid number format : -" + num + l) } + | '-' ~> whitespace ~ number ^^ { case ws ~ num => NumericLit("-" + num) } + | number ^^ NumericLit + | EofCh ^^^ EOF + | delim + | '\"' ~> failure("Unterminated string") + | rep(letter) ^^ checkKeyword + | failure("Illegal character") + ) + + def checkKeyword(xs : List[Any]) = { + val strRep = xs mkString "" + if (reserved contains strRep) Keyword(strRep) else ErrorToken("Not a keyword: " + strRep) + } + + /** A string is a collection of zero or more Unicode characters, wrapped in + * double quotes, using backslash escapes (cf. http://www.json.org/). + */ + def string = '\"' ~> rep(charSeq | chrExcept('\"', '\n', EofCh)) <~ '\"' ^^ { _ mkString "" } + + override def whitespace = rep(whitespaceChar) + + def number = intPart ~ opt(fracPart) ~ opt(expPart) ^^ { case i ~ f ~ e => + i + optString(".", f) + optString("", e) + } + def intPart = zero | intList + def intList = nonzero ~ rep(digit) ^^ {case x ~ y => (x :: y) mkString ""} + def fracPart = '.' ~> rep(digit) ^^ { _ mkString "" } + def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d => + e + optString("", s) + d.mkString("") + } + + private def optString[A](pre: String, a: Option[A]) = a match { + case Some(x) => pre + x.toString + case None => "" + } + + def zero: Parser[String] = '0' ^^^ "0" + def nonzero = elem("nonzero digit", d => d.isDigit && d != '0') + def exponent = elem("exponent character", d => d == 'e' || d == 'E') + def sign = elem("sign character", d => d == '-' || d == '+') + + def charSeq: Parser[String] = + ('\\' ~ '\"' ^^^ "\"" + |'\\' ~ '\\' ^^^ "\\" + |'\\' ~ '/' ^^^ "/" + |'\\' ~ 'b' ^^^ "\b" + |'\\' ~ 'f' ^^^ "\f" + |'\\' ~ 'n' ^^^ "\n" + |'\\' ~ 'r' ^^^ "\r" + |'\\' ~ 't' ^^^ "\t" + |'\\' ~> 'u' ~> unicodeBlock) + + val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray + def hexDigit = elem("hex digit", hexDigits.contains(_)) + + private def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { + case a ~ b ~ c ~ d => + new String(Array(Integer.parseInt(List(a, b, c, d) mkString "", 16)), 0, 1) + } + + //private def lift[T](f: String => T)(xs: List[Any]): T = f(xs mkString "") +} diff --git a/src/parser-combinators/scala/util/parsing/json/Parser.scala b/src/parser-combinators/scala/util/parsing/json/Parser.scala new file mode 100644 index 0000000000..521dfc6612 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/json/Parser.scala @@ -0,0 +1,147 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.json + +import scala.util.parsing.combinator._ +import scala.util.parsing.combinator.syntactical._ + +/** + * A marker class for the JSON result types. + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +sealed abstract class JSONType { + /** + * This version of toString allows you to provide your own value + * formatter. + */ + def toString (formatter : JSONFormat.ValueFormatter) : String + + /** + * Returns a String representation of this JSON value + * using the JSONFormat.defaultFormatter. + */ + override def toString = toString(JSONFormat.defaultFormatter) +} + +/** + * This object defines functions that are used when converting JSONType + * values into String representations. Mostly this is concerned with + * proper quoting of strings. + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This object will be removed.", "2.11.0") +object JSONFormat { + /** + * This type defines a function that can be used to + * format values into JSON format. + */ + type ValueFormatter = Any => String + + /** + * The default formatter used by the library. You can + * provide your own with the toString calls on + * JSONObject and JSONArray instances. + */ + val defaultFormatter : ValueFormatter = (x : Any) => x match { + case s : String => "\"" + quoteString(s) + "\"" + case jo : JSONObject => jo.toString(defaultFormatter) + case ja : JSONArray => ja.toString(defaultFormatter) + case other => other.toString + } + + /** + * This function can be used to properly quote Strings + * for JSON output. + */ + def quoteString (s : String) : String = + s.map { + case '"' => "\\\"" + case '\\' => "\\\\" + case '/' => "\\/" + case '\b' => "\\b" + case '\f' => "\\f" + case '\n' => "\\n" + case '\r' => "\\r" + case '\t' => "\\t" + /* We'll unicode escape any control characters. These include: + * 0x0 -> 0x1f : ASCII Control (C0 Control Codes) + * 0x7f : ASCII DELETE + * 0x80 -> 0x9f : C1 Control Codes + * + * Per RFC4627, section 2.5, we're not technically required to + * encode the C1 codes, but we do to be safe. + */ + case c if ((c >= '\u0000' && c <= '\u001f') || (c >= '\u007f' && c <= '\u009f')) => "\\u%04x".format(c.toInt) + case c => c + }.mkString +} + +/** + * Represents a JSON Object (map). + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +case class JSONObject (obj : Map[String,Any]) extends JSONType { + def toString (formatter : JSONFormat.ValueFormatter) = + "{" + obj.map({ case (k,v) => formatter(k.toString) + " : " + formatter(v) }).mkString(", ") + "}" +} + +/** + * Represents a JSON Array (list). + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +case class JSONArray (list : List[Any]) extends JSONType { + def toString (formatter : JSONFormat.ValueFormatter) = + "[" + list.map(formatter).mkString(", ") + "]" +} + +/** + * The main JSON Parser. + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +class Parser extends StdTokenParsers with ImplicitConversions { + // Fill in abstract defs + type Tokens = Lexer + val lexical = new Tokens + + // Configure lexical parsing + lexical.reserved ++= List("true", "false", "null") + lexical.delimiters ++= List("{", "}", "[", "]", ":", ",") + + /** Type signature for functions that can parse numeric literals */ + type NumericParser = String => Any + + // Global default number parsing function + protected var defaultNumberParser : NumericParser = {_.toDouble} + + // Per-thread default number parsing function + protected val numberParser = new ThreadLocal[NumericParser]() { + override def initialValue() = defaultNumberParser + } + + // Define the grammar + def root = jsonObj | jsonArray + def jsonObj = "{" ~> repsep(objEntry, ",") <~ "}" ^^ { case vals : List[_] => JSONObject(Map(vals : _*)) } + def jsonArray = "[" ~> repsep(value, ",") <~ "]" ^^ { case vals : List[_] => JSONArray(vals) } + def objEntry = stringVal ~ (":" ~> value) ^^ { case x ~ y => (x, y) } + def value: Parser[Any] = (jsonObj | jsonArray | number | "true" ^^^ true | "false" ^^^ false | "null" ^^^ null | stringVal) + def stringVal = accept("string", { case lexical.StringLit(n) => n} ) + def number = accept("number", { case lexical.NumericLit(n) => numberParser.get.apply(n)} ) +} + diff --git a/src/partest/scala/tools/partest/nest/FileManager.scala b/src/partest/scala/tools/partest/nest/FileManager.scala index ee24c0b9c1..7bfa8c6e77 100644 --- a/src/partest/scala/tools/partest/nest/FileManager.scala +++ b/src/partest/scala/tools/partest/nest/FileManager.scala @@ -68,13 +68,14 @@ trait FileManager extends FileUtil { else (SFile(LATEST_LIB).parent.parent / "classes" / what).toAbsolute.path } + def latestParserCBLib = relativeToLibrary("parser-combinators") def latestXmlLib = relativeToLibrary("xml") def latestScaladoc = relativeToLibrary("scaladoc") def latestInteractive = relativeToLibrary("interactive") def latestScalapFile = relativeToLibrary("scalap") def latestPaths = List( LATEST_LIB, LATEST_REFLECT, LATEST_COMP, LATEST_PARTEST, LATEST_ACTORS, - latestXmlLib, latestScalapFile, latestScaladoc, latestInteractive + latestParserCBLib, latestXmlLib, latestScalapFile, latestScaladoc, latestInteractive ) def latestFiles = latestPaths map (p => new java.io.File(p)) def latestUrls = latestFiles map (_.toURI.toURL) diff --git a/test/partest b/test/partest index d72c1026f3..0259cdb791 100755 --- a/test/partest +++ b/test/partest @@ -64,7 +64,7 @@ if [ -z "$EXT_CLASSPATH" ] ; then fi done elif [ -f "$SCALA_HOME/build/pack/lib/scala-partest.jar" ] ; then - for lib in `echo "scala-partest scala-library scala-xml scala-reflect scala-compiler diffutils"`; do + for lib in `echo "scala-partest scala-library scala-parser-combinators scala-xml scala-reflect scala-compiler diffutils"`; do ext="$SCALA_HOME/build/pack/lib/$lib.jar" if [ -z "$EXT_CLASSPATH" ] ; then EXT_CLASSPATH="$ext" -- cgit v1.2.3