aboutsummaryrefslogtreecommitdiff
path: root/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
diff options
context:
space:
mode:
authorMartin Odersky <odersky@gmail.com>2013-05-09 19:37:15 +0200
committerMartin Odersky <odersky@gmail.com>2013-05-09 19:37:15 +0200
commitcf4c428cc58ed330faa236bf54d06c1fad902c8a (patch)
treee56522e356c8ba7c84e7509c618d545e16ec19a8 /src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
parentbfa03db8ea8897f51316cd77a7c71b2ca25ba531 (diff)
downloaddotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.tar.gz
dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.tar.bz2
dotty-cf4c428cc58ed330faa236bf54d06c1fad902c8a.zip
Some parser revisions
(1) Added markup parsers (2) Syntax change relating to modifiers and annotations of primary constructor (3) Review of parsing with bug fixes and simplifications.
Diffstat (limited to 'src/dotty/tools/dotc/parsing/MarkupParserCommon.scala')
-rw-r--r--src/dotty/tools/dotc/parsing/MarkupParserCommon.scala262
1 files changed, 262 insertions, 0 deletions
diff --git a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
new file mode 100644
index 000000000..db2fe569b
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
@@ -0,0 +1,262 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+package dotty.tools.dotc
+package parsing
+
+import scala.xml._
+import scala.xml.parsing._
+
+import scala.io.Source
+import scala.xml.dtd._
+import scala.annotation.switch
+import Utility.Escapes.{ pairs => unescape }
+
+import Utility.SU
+
+/** This is not a public trait - it contains common code shared
+ * between the library level XML parser and the compiler's.
+ * All members should be accessed through those.
+ */
+private[dotty] trait MarkupParserCommon extends TokenTests {
+ protected def unreachable = scala.sys.error("Cannot be reached.")
+
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
+ type InputType // Source, CharArrayReader
+ type PositionType // Int, Position
+ type ElementType // NodeSeq, Tree
+ type NamespaceType // NamespaceBinding, Any
+ type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+ def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+ def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+ xSpaceOpt
+
+ (name, mkAttributes(name, pscope))
+ }
+
+ /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+ *
+ * see [15]
+ */
+ def xProcInstr: ElementType = {
+ val n = xName
+ xSpaceOpt
+ xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+ }
+
+ /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+ @param endCh either `'` or `"`
+ */
+ def xAttributeValue(endCh: Char): String = {
+ val buf = new StringBuilder
+ while (ch != endCh) {
+ // well-formedness constraint
+ if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+ else if (ch == SU) truncatedError("")
+ else buf append ch_returning_nextch
+ }
+ ch_returning_nextch
+ // @todo: normalize attribute value
+ buf.toString
+ }
+
+ def xAttributeValue(): String = {
+ val str = xAttributeValue(ch_returning_nextch)
+ // well-formedness constraint
+ normalizeAttributeValue(str)
+ }
+
+ private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+ val buf = new StringBuilder
+ while (it.hasNext) it.next match {
+ case `end` => return buf.toString
+ case ch => buf append ch
+ }
+ scala.sys.error("Expected '%s'".format(end))
+ }
+
+ /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
+ */
+ def xEndTag(startName: String) {
+ xToken('/')
+ if (xName != startName)
+ errorNoEnd(startName)
+
+ xSpaceOpt
+ xToken('>')
+ }
+
+ /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
+ * Name ::= (Letter | '_') (NameChar)*
+ *
+ * see [5] of XML 1.0 specification
+ *
+ * pre-condition: ch != ':' // assured by definition of XMLSTART token
+ * post-condition: name does neither start, nor end in ':'
+ */
+ def xName: String = {
+ if (ch == SU)
+ truncatedError("")
+ else if (!isNameStart(ch))
+ return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+ val buf = new StringBuilder
+
+ do buf append ch_returning_nextch
+ while (isNameChar(ch))
+
+ if (buf.last == ':') {
+ reportSyntaxError( "name cannot end in ':'" )
+ buf.toString dropRight 1
+ }
+ else buf.toString
+ }
+
+ private def attr_unescape(s: String) = s match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case "quote" => "\""
+ case _ => "&" + s + ";"
+ }
+
+ /** Replaces only character references right now.
+ * see spec 3.3.3
+ */
+ private def normalizeAttributeValue(attval: String): String = {
+ val buf = new StringBuilder
+ val it = attval.iterator.buffered
+
+ while (it.hasNext) buf append (it.next match {
+ case ' ' | '\t' | '\n' | '\r' => " "
+ case '&' if it.head == '#' => it.next ; xCharRef(it)
+ case '&' => attr_unescape(takeUntilChar(it, ';'))
+ case c => c
+ })
+
+ buf.toString
+ }
+
+ /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+ * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ *
+ * see [66]
+ */
+ def xCharRef(ch: () => Char, nextch: () => Unit): String =
+ Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+
+ def xCharRef(it: Iterator[Char]): String = {
+ var c = it.next
+ Utility.parseCharRef(() => c, () => { c = it.next }, reportSyntaxError _, truncatedError _)
+ }
+
+ def xCharRef: String = xCharRef(() => ch, () => nextch)
+
+ /** Create a lookahead reader which does not influence the input */
+ def lookahead(): BufferedIterator[Char]
+
+ /** The library and compiler parsers had the interesting distinction of
+ * different behavior for nextch (a function for which there are a total
+ * of two plausible behaviors, so we know the design space was fully
+ * explored.) One of them returned the value of nextch before the increment
+ * and one of them the new value. So to unify code we have to at least
+ * temporarily abstract over the nextchs.
+ */
+ def ch: Char
+ def nextch(): Unit
+ protected def ch_returning_nextch: Char
+ def eof: Boolean
+
+ // def handle: HandleType
+ var tmppos: PositionType
+
+ def xHandleError(that: Char, msg: String): Unit
+ def reportSyntaxError(str: String): Unit
+ def reportSyntaxError(pos: Int, str: String): Unit
+
+ def truncatedError(msg: String): Nothing
+ def errorNoEnd(tag: String): Nothing
+
+ protected def errorAndResult[T](msg: String, x: T): T = {
+ reportSyntaxError(msg)
+ x
+ }
+
+ def xToken(that: Char) {
+ if (ch == that) nextch
+ else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+ }
+ def xToken(that: Seq[Char]) { that foreach xToken }
+
+ /** scan [S] '=' [S]*/
+ def xEQ() = { xSpaceOpt; xToken('='); xSpaceOpt }
+
+ /** skip optional space S? */
+ def xSpaceOpt() = while (isSpace(ch) && !eof) nextch
+
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpace() =
+ if (isSpace(ch)) { nextch; xSpaceOpt }
+ else xHandleError(ch, "whitespace expected")
+
+ /** Apply a function and return the passed value */
+ def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+
+ /** Execute body with a variable saved and restored after execution */
+ def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+ val saved = getter
+ try body
+ finally setter(saved)
+ }
+
+ /** Take characters from input stream until given String "until"
+ * is seen. Once seen, the accumulated characters are passed
+ * along with the current Position to the supplied handler function.
+ */
+ protected def xTakeUntil[T](
+ handler: (PositionType, String) => T,
+ positioner: () => PositionType,
+ until: String): T =
+ {
+ val sb = new StringBuilder
+ val head = until.head
+ val rest = until.tail
+
+ while (true) {
+ if (ch == head && peek(rest))
+ return handler(positioner(), sb.toString)
+ else if (ch == SU)
+ truncatedError("") // throws TruncatedXMLControl in compiler
+
+ sb append ch
+ nextch
+ }
+ unreachable
+ }
+
+ /** Create a non-destructive lookahead reader and see if the head
+ * of the input would match the given String. If yes, return true
+ * and drop the entire String from input; if no, return false
+ * and leave input unchanged.
+ */
+ private def peek(lookingFor: String): Boolean =
+ (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+ // drop the chars from the real reader (all lookahead + orig)
+ (0 to lookingFor.length) foreach (_ => nextch)
+ true
+ }
+}