Some parser revisions

(1) Added markup parsers (2) Syntax change relating to modifiers and annotations of primary constructor (3) Review of parsing with bug fixes and simplifications.
author: Martin Odersky <odersky@gmail.com> 2013-05-09 19:37:15 +0200
committer: Martin Odersky <odersky@gmail.com> 2013-05-09 19:37:15 +0200
commit: cf4c428cc58ed330faa236bf54d06c1fad902c8a (patch)
tree: e56522e356c8ba7c84e7509c618d545e16ec19a8 /src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
parent: bfa03db8ea8897f51316cd77a7c71b2ca25ba531 (diff)
download: dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.tar.gz
dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.tar.bz2
dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.zip
1 files changed, 262 insertions, 0 deletions
diff --git a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
new file mode 100644
index 000000000..db2fe569b
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
@@ -0,0 +1,262 @@
+/*                     __                                               *\
+**     ________ ___   / /  ___     Scala API                            **
+**    / __/ __// _ | / /  / _ |    (c) 2003-2013, LAMP/EPFL             **
+**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
+** /____/\___/_/ |_/____/_/ | |                                         **
+**                          |/                                          **
+\*                                                                      */
+package dotty.tools.dotc
+package parsing
+
+import scala.xml._
+import scala.xml.parsing._
+
+import scala.io.Source
+import scala.xml.dtd._
+import scala.annotation.switch
+import Utility.Escapes.{ pairs => unescape }
+
+import Utility.SU
+
+/** This is not a public trait - it contains common code shared
+ *  between the library level XML parser and the compiler's.
+ *  All members should be accessed through those.
+ */
+private[dotty] trait MarkupParserCommon extends TokenTests {
+  protected def unreachable = scala.sys.error("Cannot be reached.")
+
+  // type HandleType       // MarkupHandler, SymbolicXMLBuilder
+  type InputType        // Source, CharArrayReader
+  type PositionType     // Int, Position
+  type ElementType      // NodeSeq, Tree
+  type NamespaceType    // NamespaceBinding, Any
+  type AttributesType   // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+  def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+  def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+  /** parse a start or empty tag.
+   *  [40] STag         ::= '<' Name { S Attribute } [S]
+   *  [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+   */
+  protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+    val name = xName
+    xSpaceOpt
+
+    (name, mkAttributes(name, pscope))
+  }
+
+  /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+   *
+   * see [15]
+   */
+  def xProcInstr: ElementType = {
+    val n = xName
+    xSpaceOpt
+    xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+  }
+
+  /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+   @param endCh either `'` or `"`
+   */
+  def xAttributeValue(endCh: Char): String = {
+    val buf = new StringBuilder
+    while (ch != endCh) {
+      // well-formedness constraint
+      if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+      else if (ch == SU) truncatedError("")
+      else buf append ch_returning_nextch
+    }
+    ch_returning_nextch
+    // @todo: normalize attribute value
+    buf.toString
+  }
+
+  def xAttributeValue(): String = {
+    val str = xAttributeValue(ch_returning_nextch)
+    // well-formedness constraint
+    normalizeAttributeValue(str)
+  }
+
+  private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+    val buf = new StringBuilder
+    while (it.hasNext) it.next match {
+      case `end`  => return buf.toString
+      case ch     => buf append ch
+    }
+    scala.sys.error("Expected '%s'".format(end))
+  }
+
+  /** [42]  '<' xmlEndTag ::=  '<' '/' Name S? '>'
+   */
+  def xEndTag(startName: String) {
+    xToken('/')
+    if (xName != startName)
+      errorNoEnd(startName)
+
+    xSpaceOpt
+    xToken('>')
+  }
+
+  /** actually, Name ::= (Letter | '_' | ':') (NameChar)*  but starting with ':' cannot happen
+   *  Name ::= (Letter | '_') (NameChar)*
+   *
+   *  see  [5] of XML 1.0 specification
+   *
+   *  pre-condition:  ch != ':' // assured by definition of XMLSTART token
+   *  post-condition: name does neither start, nor end in ':'
+   */
+  def xName: String = {
+    if (ch == SU)
+      truncatedError("")
+    else if (!isNameStart(ch))
+      return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+    val buf = new StringBuilder
+
+    do buf append ch_returning_nextch
+    while (isNameChar(ch))
+
+    if (buf.last == ':') {
+      reportSyntaxError( "name cannot end in ':'" )
+      buf.toString dropRight 1
+    }
+    else buf.toString
+  }
+
+  private def attr_unescape(s: String) = s match {
+    case "lt"     => "<"
+    case "gt"     => ">"
+    case "amp"    => "&"
+    case "apos"   => "'"
+    case "quot"   => "\""
+    case "quote"  => "\""
+    case _        => "&" + s + ";"
+  }
+
+  /** Replaces only character references right now.
+   *  see spec 3.3.3
+   */
+  private def normalizeAttributeValue(attval: String): String = {
+    val buf = new StringBuilder
+    val it = attval.iterator.buffered
+
+    while (it.hasNext) buf append (it.next match {
+      case ' ' | '\t' | '\n' | '\r' => " "
+      case '&' if it.head == '#'    => it.next ; xCharRef(it)
+      case '&'                      => attr_unescape(takeUntilChar(it, ';'))
+      case c                        => c
+    })
+
+    buf.toString
+  }
+
+  /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+   *            | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+   *
+   * see [66]
+   */
+  def xCharRef(ch: () => Char, nextch: () => Unit): String =
+    Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+
+  def xCharRef(it: Iterator[Char]): String = {
+    var c = it.next
+    Utility.parseCharRef(() => c, () => { c = it.next }, reportSyntaxError _, truncatedError _)
+  }
+
+  def xCharRef: String = xCharRef(() => ch, () => nextch)
+
+  /** Create a lookahead reader which does not influence the input */
+  def lookahead(): BufferedIterator[Char]
+
+  /** The library and compiler parsers had the interesting distinction of
+   *  different behavior for nextch (a function for which there are a total
+   *  of two plausible behaviors, so we know the design space was fully
+   *  explored.) One of them returned the value of nextch before the increment
+   *  and one of them the new value.  So to unify code we have to at least
+   *  temporarily abstract over the nextchs.
+   */
+  def ch: Char
+  def nextch(): Unit
+  protected def ch_returning_nextch: Char
+  def eof: Boolean
+
+  // def handle: HandleType
+  var tmppos: PositionType
+
+  def xHandleError(that: Char, msg: String): Unit
+  def reportSyntaxError(str: String): Unit
+  def reportSyntaxError(pos: Int, str: String): Unit
+
+  def truncatedError(msg: String): Nothing
+  def errorNoEnd(tag: String): Nothing
+
+  protected def errorAndResult[T](msg: String, x: T): T = {
+    reportSyntaxError(msg)
+    x
+  }
+
+  def xToken(that: Char) {
+    if (ch == that) nextch
+    else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+  }
+  def xToken(that: Seq[Char]) { that foreach xToken }
+
+  /** scan [S] '=' [S]*/
+  def xEQ() = { xSpaceOpt; xToken('='); xSpaceOpt }
+
+  /** skip optional space S? */
+  def xSpaceOpt() = while (isSpace(ch) && !eof) nextch
+
+  /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+  def xSpace() =
+    if (isSpace(ch)) { nextch; xSpaceOpt }
+    else xHandleError(ch, "whitespace expected")
+
+  /** Apply a function and return the passed value */
+  def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+
+  /** Execute body with a variable saved and restored after execution */
+  def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+    val saved = getter
+    try body
+    finally setter(saved)
+  }
+
+  /** Take characters from input stream until given String "until"
+   *  is seen.  Once seen, the accumulated characters are passed
+   *  along with the current Position to the supplied handler function.
+   */
+  protected def xTakeUntil[T](
+    handler: (PositionType, String) => T,
+    positioner: () => PositionType,
+    until: String): T =
+  {
+    val sb = new StringBuilder
+    val head = until.head
+    val rest = until.tail
+
+    while (true) {
+      if (ch == head && peek(rest))
+        return handler(positioner(), sb.toString)
+      else if (ch == SU)
+        truncatedError("")  // throws TruncatedXMLControl in compiler
+
+      sb append ch
+      nextch
+    }
+    unreachable
+  }
+
+  /** Create a non-destructive lookahead reader and see if the head
+   *  of the input would match the given String.  If yes, return true
+   *  and drop the entire String from input; if no, return false
+   *  and leave input unchanged.
+   */
+  private def peek(lookingFor: String): Boolean =
+    (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+      // drop the chars from the real reader (all lookahead + orig)
+      (0 to lookingFor.length) foreach (_ => nextch)
+      true
+    }
+}
author	Martin Odersky <odersky@gmail.com>	2013-05-09 19:37:15 +0200
committer	Martin Odersky <odersky@gmail.com>	2013-05-09 19:37:15 +0200
commit	cf4c428cc58ed330faa236bf54d06c1fad902c8a (patch)
tree	e56522e356c8ba7c84e7509c618d545e16ec19a8 /src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
parent	bfa03db8ea8897f51316cd77a7c71b2ca25ba531 (diff)
download	dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.tar.gz dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.tar.bz2 dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.zip