path: root/src/library/scala/util/parsing/combinator/RegexParsers.scala


                                                                          
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2006-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */


package scala.util.parsing.combinator

import java.util.regex.Pattern
import scala.util.matching.Regex
import scala.util.parsing.input._
import scala.collection.immutable.PagedSeq
import scala.language.implicitConversions

/** The ''most important'' differences between `RegexParsers` and
 *  [[scala.util.parsing.combinator.Parsers]] are:
 *
 *  - `Elem` is defined to be [[scala.Char]]
 *  - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`,
 *    so that string literals can be used as parser combinators.
 *  - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`,
 *    so that regex expressions can be used as parser combinators.
 *  - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true,
 *    skip any whitespace before each parser is called.
 *  - Protected val `whiteSpace` returns a regex that identifies whitespace.
 *
 *  For example, this creates a very simple calculator receiving `String` input:
 *
 *  {{{
 *  object Calculator extends RegexParsers {
 *    def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble }
 *    def factor: Parser[Double] = number | "(" ~> expr <~ ")"
 *    def term  : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ {
 *      case number ~ list => (number /: list) {
 *        case (x, "*" ~ y) => x * y
 *        case (x, "/" ~ y) => x / y
 *      }
 *    }
 *    def expr  : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ {
 *      case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /:
 *        case (x, "+" ~ y) => x + y
 *        case (x, "-" ~ y) => x - y
 *      }
 *    }
 *
 *    def apply(input: String): Double = parseAll(expr, input) match {
 *      case Success(result, _) => result
 *      case failure : NoSuccess => scala.sys.error(failure.msg)
 *    }
 *  }
 *  }}}
 */
trait RegexParsers extends Parsers {

  type Elem = Char

  protected val whiteSpace = """\s+""".r

  def skipWhitespace = whiteSpace.toString.length > 0

  /** Method called to handle whitespace before parsers.
   *
   *  It checks `skipWhitespace` and, if true, skips anything
   *  matching `whiteSpace` starting from the current offset.
   *
   *  @param source  The input being parsed.
   *  @param offset  The offset into `source` from which to match.
   *  @return        The offset to be used for the next parser.
   */
  protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
    if (skipWhitespace)
      (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
        case Some(matched) => offset + matched.end
        case None => offset
      }
    else
      offset

  /** A parser that matches a literal string */
  implicit def literal(s: String): Parser[String] = new Parser[String] {
    def apply(in: Input) = {
      val source = in.source
      val offset = in.offset
      val start = handleWhiteSpace(source, offset)
      var i = 0
      var j = start
      while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) {
        i += 1
        j += 1
      }
      if (i == s.length)
        Success(source.subSequence(start, j).toString, in.drop(j - offset))
      else  {
        val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
        Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset))
      }
    }
  }

  /** A parser that matches a regex string */
  implicit def regex(r: Regex): Parser[String] = new Parser[String] {
    def apply(in: Input) = {
      val source = in.source
      val offset = in.offset
      val start = handleWhiteSpace(source, offset)
      (r findPrefixMatchOf (source.subSequence(start, source.length))) match {
        case Some(matched) =>
          Success(source.subSequence(start, start + matched.end).toString,
                  in.drop(start + matched.end - offset))
        case None =>
          val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
          Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset))
      }
    }
  }

  /** `positioned` decorates a parser's result with the start position of the input it consumed.
   * If whitespace is being skipped, then it is skipped before the start position is recorded.
   *
   * @param p a `Parser` whose result conforms to `Positional`.
   * @return A parser that has the same behaviour as `p`, but which marks its result with the
   *         start position of the input it consumed after whitespace has been skipped, if it
   *         didn't already have a position.
   */
  override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = {
    val pp = super.positioned(p)
    new Parser[T] {
      def apply(in: Input) = {
        val offset = in.offset
        val start = handleWhiteSpace(in.source, offset)
        pp(in.drop (start - offset))
      }
    }
  }

  override def phrase[T](p: Parser[T]): Parser[T] =
    super.phrase(p <~ opt("""\z""".r))

  /** Parse some prefix of reader `in` with parser `p`. */
  def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] =
    p(in)

  /** Parse some prefix of character sequence `in` with parser `p`. */
  def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] =
    p(new CharSequenceReader(in))

  /** Parse some prefix of reader `in` with parser `p`. */
  def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] =
    p(new PagedSeqReader(PagedSeq.fromReader(in)))

  /** Parse all of reader `in` with parser `p`. */
  def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] =
    parse(phrase(p), in)

  /** Parse all of reader `in` with parser `p`. */
  def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] =
    parse(phrase(p), in)

  /** Parse all of character sequence `in` with parser `p`. */
  def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] =
    parse(phrase(p), in)
}
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2006-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */


package scala.util.parsing.combinator

import java.util.regex.Pattern
import scala.util.matching.Regex
import scala.util.parsing.input._
import scala.collection.immutable.PagedSeq
import scala.language.implicitConversions

/** The ''most important'' differences between `RegexParsers` and
 *  [[scala.util.parsing.combinator.Parsers]] are:
 *
 *  - `Elem` is defined to be [[scala.Char]]
 *  - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`,
 *    so that string literals can be used as parser combinators.
 *  - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`,
 *    so that regex expressions can be used as parser combinators.
 *  - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true,
 *    skip any whitespace before each parser is called.
 *  - Protected val `whiteSpace` returns a regex that identifies whitespace.
 *
 *  For example, this creates a very simple calculator receiving `String` input:
 *
 *  {{{
 *  object Calculator extends RegexParsers {
 *    def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble }
 *    def factor: Parser[Double] = number | "(" ~> expr <~ ")"
 *    def term  : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ {
 *      case number ~ list => (number /: list) {
 *        case (x, "*" ~ y) => x * y
 *        case (x, "/" ~ y) => x / y
 *      }
 *    }
 *    def expr  : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ {
 *      case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /:
 *        case (x, "+" ~ y) => x + y
 *        case (x, "-" ~ y) => x - y
 *      }
 *    }
 *
 *    def apply(input: String): Double = parseAll(expr, input) match {
 *      case Success(result, _) => result
 *      case failure : NoSuccess => scala.sys.error(failure.msg)
 *    }
 *  }
 *  }}}
 */
trait RegexParsers extends Parsers {

  type Elem = Char

  protected val whiteSpace = """\s+""".r

  def skipWhitespace = whiteSpace.toString.length > 0

  /** Method called to handle whitespace before parsers.
   *
   *  It checks `skipWhitespace` and, if true, skips anything
   *  matching `whiteSpace` starting from the current offset.
   *
   *  @param source  The input being parsed.
   *  @param offset  The offset into `source` from which to match.
   *  @return        The offset to be used for the next parser.
   */
  protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
    if (skipWhitespace)
      (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
        case Some(matched) => offset + matched.end
        case None => offset
      }
    else
      offset

  /** A parser that matches a literal string */
  implicit def literal(s: String): Parser[String] = new Parser[String] {
    def apply(in: Input) = {
      val source = in.source
      val offset = in.offset
      val start = handleWhiteSpace(source, offset)
      var i = 0
      var j = start
      while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) {
        i += 1
        j += 1
      }
      if (i == s.length)
        Success(source.subSequence(start, j).toString, in.drop(j - offset))
      else  {
        val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
        Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset))
      }
    }
  }

  /** A parser that matches a regex string */
  implicit def regex(r: Regex): Parser[String] = new Parser[String] {
    def apply(in: Input) = {
      val source = in.source
      val offset = in.offset
      val start = handleWhiteSpace(source, offset)
      (r findPrefixMatchOf (source.subSequence(start, source.length))) match {
        case Some(matched) =>
          Success(source.subSequence(start, start + matched.end).toString,
                  in.drop(start + matched.end - offset))
        case None =>
          val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
          Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset))
      }
    }
  }

  /** `positioned` decorates a parser's result with the start position of the input it consumed.
   * If whitespace is being skipped, then it is skipped before the start position is recorded.
   *
   * @param p a `Parser` whose result conforms to `Positional`.
   * @return A parser that has the same behaviour as `p`, but which marks its result with the
   *         start position of the input it consumed after whitespace has been skipped, if it
   *         didn't already have a position.
   */
  override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = {
    val pp = super.positioned(p)
    new Parser[T] {
      def apply(in: Input) = {
        val offset = in.offset
        val start = handleWhiteSpace(in.source, offset)
        pp(in.drop (start - offset))
      }
    }
  }

  override def phrase[T](p: Parser[T]): Parser[T] =
    super.phrase(p <~ opt("""\z""".r))

  /** Parse some prefix of reader `in` with parser `p`. */
  def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] =
    p(in)

  /** Parse some prefix of character sequence `in` with parser `p`. */
  def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] =
    p(new CharSequenceReader(in))

  /** Parse some prefix of reader `in` with parser `p`. */
  def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] =
    p(new PagedSeqReader(PagedSeq.fromReader(in)))

  /** Parse all of reader `in` with parser `p`. */
  def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] =
    parse(phrase(p), in)

  /** Parse all of reader `in` with parser `p`. */
  def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] =
    parse(phrase(p), in)

  /** Parse all of character sequence `in` with parser `p`. */
  def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] =
    parse(phrase(p), in)
}