From cf4c428cc58ed330faa236bf54d06c1fad902c8a Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Thu, 9 May 2013 19:37:15 +0200 Subject: Some parser revisions (1) Added markup parsers (2) Syntax change relating to modifiers and annotations of primary constructor (3) Review of parsing with bug fixes and simplifications. --- .../tools/dotc/parsing/MarkupParserCommon.scala | 262 +++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 src/dotty/tools/dotc/parsing/MarkupParserCommon.scala (limited to 'src/dotty/tools/dotc/parsing/MarkupParserCommon.scala') diff --git a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala new file mode 100644 index 000000000..db2fe569b --- /dev/null +++ b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala @@ -0,0 +1,262 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ +package dotty.tools.dotc +package parsing + +import scala.xml._ +import scala.xml.parsing._ + +import scala.io.Source +import scala.xml.dtd._ +import scala.annotation.switch +import Utility.Escapes.{ pairs => unescape } + +import Utility.SU + +/** This is not a public trait - it contains common code shared + * between the library level XML parser and the compiler's. + * All members should be accessed through those. + */ +private[dotty] trait MarkupParserCommon extends TokenTests { + protected def unreachable = scala.sys.error("Cannot be reached.") + + // type HandleType // MarkupHandler, SymbolicXMLBuilder + type InputType // Source, CharArrayReader + type PositionType // Int, Position + type ElementType // NodeSeq, Tree + type NamespaceType // NamespaceBinding, Any + type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] + + def mkAttributes(name: String, pscope: NamespaceType): AttributesType + def mkProcInstr(position: PositionType, name: String, text: String): ElementType + + /** parse a start or empty tag. + * [40] STag ::= '<' Name { S Attribute } [S] + * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] + */ + protected def xTag(pscope: NamespaceType): (String, AttributesType) = { + val name = xName + xSpaceOpt + + (name, mkAttributes(name, pscope)) + } + + /** '?' {Char})]'?>' + * + * see [15] + */ + def xProcInstr: ElementType = { + val n = xName + xSpaceOpt + xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") + } + + /** attribute value, terminated by either `'` or `"`. value may not contain `<`. + @param endCh either `'` or `"` + */ + def xAttributeValue(endCh: Char): String = { + val buf = new StringBuilder + while (ch != endCh) { + // well-formedness constraint + if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") + else if (ch == SU) truncatedError("") + else buf append ch_returning_nextch + } + ch_returning_nextch + // @todo: normalize attribute value + buf.toString + } + + def xAttributeValue(): String = { + val str = xAttributeValue(ch_returning_nextch) + // well-formedness constraint + normalizeAttributeValue(str) + } + + private def takeUntilChar(it: Iterator[Char], end: Char): String = { + val buf = new StringBuilder + while (it.hasNext) it.next match { + case `end` => return buf.toString + case ch => buf append ch + } + scala.sys.error("Expected '%s'".format(end)) + } + + /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' + */ + def xEndTag(startName: String) { + xToken('/') + if (xName != startName) + errorNoEnd(startName) + + xSpaceOpt + xToken('>') + } + + /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen + * Name ::= (Letter | '_') (NameChar)* + * + * see [5] of XML 1.0 specification + * + * pre-condition: ch != ':' // assured by definition of XMLSTART token + * post-condition: name does neither start, nor end in ':' + */ + def xName: String = { + if (ch == SU) + truncatedError("") + else if (!isNameStart(ch)) + return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") + + val buf = new StringBuilder + + do buf append ch_returning_nextch + while (isNameChar(ch)) + + if (buf.last == ':') { + reportSyntaxError( "name cannot end in ':'" ) + buf.toString dropRight 1 + } + else buf.toString + } + + private def attr_unescape(s: String) = s match { + case "lt" => "<" + case "gt" => ">" + case "amp" => "&" + case "apos" => "'" + case "quot" => "\"" + case "quote" => "\"" + case _ => "&" + s + ";" + } + + /** Replaces only character references right now. + * see spec 3.3.3 + */ + private def normalizeAttributeValue(attval: String): String = { + val buf = new StringBuilder + val it = attval.iterator.buffered + + while (it.hasNext) buf append (it.next match { + case ' ' | '\t' | '\n' | '\r' => " " + case '&' if it.head == '#' => it.next ; xCharRef(it) + case '&' => attr_unescape(takeUntilChar(it, ';')) + case c => c + }) + + buf.toString + } + + /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * + * see [66] + */ + def xCharRef(ch: () => Char, nextch: () => Unit): String = + Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) + + def xCharRef(it: Iterator[Char]): String = { + var c = it.next + Utility.parseCharRef(() => c, () => { c = it.next }, reportSyntaxError _, truncatedError _) + } + + def xCharRef: String = xCharRef(() => ch, () => nextch) + + /** Create a lookahead reader which does not influence the input */ + def lookahead(): BufferedIterator[Char] + + /** The library and compiler parsers had the interesting distinction of + * different behavior for nextch (a function for which there are a total + * of two plausible behaviors, so we know the design space was fully + * explored.) One of them returned the value of nextch before the increment + * and one of them the new value. So to unify code we have to at least + * temporarily abstract over the nextchs. + */ + def ch: Char + def nextch(): Unit + protected def ch_returning_nextch: Char + def eof: Boolean + + // def handle: HandleType + var tmppos: PositionType + + def xHandleError(that: Char, msg: String): Unit + def reportSyntaxError(str: String): Unit + def reportSyntaxError(pos: Int, str: String): Unit + + def truncatedError(msg: String): Nothing + def errorNoEnd(tag: String): Nothing + + protected def errorAndResult[T](msg: String, x: T): T = { + reportSyntaxError(msg) + x + } + + def xToken(that: Char) { + if (ch == that) nextch + else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) + } + def xToken(that: Seq[Char]) { that foreach xToken } + + /** scan [S] '=' [S]*/ + def xEQ() = { xSpaceOpt; xToken('='); xSpaceOpt } + + /** skip optional space S? */ + def xSpaceOpt() = while (isSpace(ch) && !eof) nextch + + /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ + def xSpace() = + if (isSpace(ch)) { nextch; xSpaceOpt } + else xHandleError(ch, "whitespace expected") + + /** Apply a function and return the passed value */ + def returning[T](x: T)(f: T => Unit): T = { f(x); x } + + /** Execute body with a variable saved and restored after execution */ + def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { + val saved = getter + try body + finally setter(saved) + } + + /** Take characters from input stream until given String "until" + * is seen. Once seen, the accumulated characters are passed + * along with the current Position to the supplied handler function. + */ + protected def xTakeUntil[T]( + handler: (PositionType, String) => T, + positioner: () => PositionType, + until: String): T = + { + val sb = new StringBuilder + val head = until.head + val rest = until.tail + + while (true) { + if (ch == head && peek(rest)) + return handler(positioner(), sb.toString) + else if (ch == SU) + truncatedError("") // throws TruncatedXMLControl in compiler + + sb append ch + nextch + } + unreachable + } + + /** Create a non-destructive lookahead reader and see if the head + * of the input would match the given String. If yes, return true + * and drop the entire String from input; if no, return false + * and leave input unchanged. + */ + private def peek(lookingFor: String): Boolean = + (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { + // drop the chars from the real reader (all lookahead + orig) + (0 to lookingFor.length) foreach (_ => nextch) + true + } +} -- cgit v1.2.3