diff options
Diffstat (limited to 'src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala')
-rw-r--r-- | src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala new file mode 100644 index 0000000000..8ebbc573ad --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala @@ -0,0 +1,166 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import java.util.regex.Pattern +import scala.util.matching.Regex +import scala.util.parsing.input._ +import scala.collection.immutable.PagedSeq +import scala.language.implicitConversions + +/** The ''most important'' differences between `RegexParsers` and + * [[scala.util.parsing.combinator.Parsers]] are: + * + * - `Elem` is defined to be [[scala.Char]] + * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, + * so that string literals can be used as parser combinators. + * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, + * so that regex expressions can be used as parser combinators. + * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, + * skip any whitespace before each parser is called. + * - Protected val `whiteSpace` returns a regex that identifies whitespace. + * + * For example, this creates a very simple calculator receiving `String` input: + * + * {{{ + * object Calculator extends RegexParsers { + * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } + * def factor: Parser[Double] = number | "(" ~> expr <~ ")" + * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { + * case number ~ list => (number /: list) { + * case (x, "*" ~ y) => x * y + * case (x, "/" ~ y) => x / y + * } + * } + * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { + * case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /: + * case (x, "+" ~ y) => x + y + * case (x, "-" ~ y) => x - y + * } + * } + * + * def apply(input: String): Double = parseAll(expr, input) match { + * case Success(result, _) => result + * case failure : NoSuccess => scala.sys.error(failure.msg) + * } + * } + * }}} + */ +trait RegexParsers extends Parsers { + + type Elem = Char + + protected val whiteSpace = """\s+""".r + + def skipWhitespace = whiteSpace.toString.length > 0 + + /** Method called to handle whitespace before parsers. + * + * It checks `skipWhitespace` and, if true, skips anything + * matching `whiteSpace` starting from the current offset. + * + * @param source The input being parsed. + * @param offset The offset into `source` from which to match. + * @return The offset to be used for the next parser. + */ + protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = + if (skipWhitespace) + (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { + case Some(matched) => offset + matched.end + case None => offset + } + else + offset + + /** A parser that matches a literal string */ + implicit def literal(s: String): Parser[String] = new Parser[String] { + def apply(in: Input) = { + val source = in.source + val offset = in.offset + val start = handleWhiteSpace(source, offset) + var i = 0 + var j = start + while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) { + i += 1 + j += 1 + } + if (i == s.length) + Success(source.subSequence(start, j).toString, in.drop(j - offset)) + else { + val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" + Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset)) + } + } + } + + /** A parser that matches a regex string */ + implicit def regex(r: Regex): Parser[String] = new Parser[String] { + def apply(in: Input) = { + val source = in.source + val offset = in.offset + val start = handleWhiteSpace(source, offset) + (r findPrefixMatchOf (source.subSequence(start, source.length))) match { + case Some(matched) => + Success(source.subSequence(start, start + matched.end).toString, + in.drop(start + matched.end - offset)) + case None => + val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" + Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset)) + } + } + } + + /** `positioned` decorates a parser's result with the start position of the input it consumed. + * If whitespace is being skipped, then it is skipped before the start position is recorded. + * + * @param p a `Parser` whose result conforms to `Positional`. + * @return A parser that has the same behaviour as `p`, but which marks its result with the + * start position of the input it consumed after whitespace has been skipped, if it + * didn't already have a position. + */ + override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = { + val pp = super.positioned(p) + new Parser[T] { + def apply(in: Input) = { + val offset = in.offset + val start = handleWhiteSpace(in.source, offset) + pp(in.drop (start - offset)) + } + } + } + + override def phrase[T](p: Parser[T]): Parser[T] = + super.phrase(p <~ opt("""\z""".r)) + + /** Parse some prefix of reader `in` with parser `p`. */ + def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = + p(in) + + /** Parse some prefix of character sequence `in` with parser `p`. */ + def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = + p(new CharSequenceReader(in)) + + /** Parse some prefix of reader `in` with parser `p`. */ + def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = + p(new PagedSeqReader(PagedSeq.fromReader(in))) + + /** Parse all of reader `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = + parse(phrase(p), in) + + /** Parse all of reader `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = + parse(phrase(p), in) + + /** Parse all of character sequence `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = + parse(phrase(p), in) +} |