aboutsummaryrefslogtreecommitdiff
path: root/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
diff options
context:
space:
mode:
Diffstat (limited to 'src/dotty/tools/dotc/parsing/MarkupParserCommon.scala')
-rw-r--r--src/dotty/tools/dotc/parsing/MarkupParserCommon.scala262
1 files changed, 262 insertions, 0 deletions
diff --git a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
new file mode 100644
index 000000000..db2fe569b
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
@@ -0,0 +1,262 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+package dotty.tools.dotc
+package parsing
+
+import scala.xml._
+import scala.xml.parsing._
+
+import scala.io.Source
+import scala.xml.dtd._
+import scala.annotation.switch
+import Utility.Escapes.{ pairs => unescape }
+
+import Utility.SU
+
+/** This is not a public trait - it contains common code shared
+ * between the library level XML parser and the compiler's.
+ * All members should be accessed through those.
+ */
+private[dotty] trait MarkupParserCommon extends TokenTests {
+ protected def unreachable = scala.sys.error("Cannot be reached.")
+
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
+ type InputType // Source, CharArrayReader
+ type PositionType // Int, Position
+ type ElementType // NodeSeq, Tree
+ type NamespaceType // NamespaceBinding, Any
+ type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+ def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+ def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+ xSpaceOpt
+
+ (name, mkAttributes(name, pscope))
+ }
+
+ /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+ *
+ * see [15]
+ */
+ def xProcInstr: ElementType = {
+ val n = xName
+ xSpaceOpt
+ xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+ }
+
+ /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+ @param endCh either `'` or `"`
+ */
+ def xAttributeValue(endCh: Char): String = {
+ val buf = new StringBuilder
+ while (ch != endCh) {
+ // well-formedness constraint
+ if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+ else if (ch == SU) truncatedError("")
+ else buf append ch_returning_nextch
+ }
+ ch_returning_nextch
+ // @todo: normalize attribute value
+ buf.toString
+ }
+
+ def xAttributeValue(): String = {
+ val str = xAttributeValue(ch_returning_nextch)
+ // well-formedness constraint
+ normalizeAttributeValue(str)
+ }
+
+ private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+ val buf = new StringBuilder
+ while (it.hasNext) it.next match {
+ case `end` => return buf.toString
+ case ch => buf append ch
+ }
+ scala.sys.error("Expected '%s'".format(end))
+ }
+
+ /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
+ */
+ def xEndTag(startName: String) {
+ xToken('/')
+ if (xName != startName)
+ errorNoEnd(startName)
+
+ xSpaceOpt
+ xToken('>')
+ }
+
+ /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
+ * Name ::= (Letter | '_') (NameChar)*
+ *
+ * see [5] of XML 1.0 specification
+ *
+ * pre-condition: ch != ':' // assured by definition of XMLSTART token
+ * post-condition: name does neither start, nor end in ':'
+ */
+ def xName: String = {
+ if (ch == SU)
+ truncatedError("")
+ else if (!isNameStart(ch))
+ return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+ val buf = new StringBuilder
+
+ do buf append ch_returning_nextch
+ while (isNameChar(ch))
+
+ if (buf.last == ':') {
+ reportSyntaxError( "name cannot end in ':'" )
+ buf.toString dropRight 1
+ }
+ else buf.toString
+ }
+
+ private def attr_unescape(s: String) = s match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case "quote" => "\""
+ case _ => "&" + s + ";"
+ }
+
+ /** Replaces only character references right now.
+ * see spec 3.3.3
+ */
+ private def normalizeAttributeValue(attval: String): String = {
+ val buf = new StringBuilder
+ val it = attval.iterator.buffered
+
+ while (it.hasNext) buf append (it.next match {
+ case ' ' | '\t' | '\n' | '\r' => " "
+ case '&' if it.head == '#' => it.next ; xCharRef(it)
+ case '&' => attr_unescape(takeUntilChar(it, ';'))
+ case c => c
+ })
+
+ buf.toString
+ }
+
+ /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+ * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ *
+ * see [66]
+ */
+ def xCharRef(ch: () => Char, nextch: () => Unit): String =
+ Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+
+ def xCharRef(it: Iterator[Char]): String = {
+ var c = it.next
+ Utility.parseCharRef(() => c, () => { c = it.next }, reportSyntaxError _, truncatedError _)
+ }
+
+ def xCharRef: String = xCharRef(() => ch, () => nextch)
+
+ /** Create a lookahead reader which does not influence the input */
+ def lookahead(): BufferedIterator[Char]
+
+ /** The library and compiler parsers had the interesting distinction of
+ * different behavior for nextch (a function for which there are a total
+ * of two plausible behaviors, so we know the design space was fully
+ * explored.) One of them returned the value of nextch before the increment
+ * and one of them the new value. So to unify code we have to at least
+ * temporarily abstract over the nextchs.
+ */
+ def ch: Char
+ def nextch(): Unit
+ protected def ch_returning_nextch: Char
+ def eof: Boolean
+
+ // def handle: HandleType
+ var tmppos: PositionType
+
+ def xHandleError(that: Char, msg: String): Unit
+ def reportSyntaxError(str: String): Unit
+ def reportSyntaxError(pos: Int, str: String): Unit
+
+ def truncatedError(msg: String): Nothing
+ def errorNoEnd(tag: String): Nothing
+
+ protected def errorAndResult[T](msg: String, x: T): T = {
+ reportSyntaxError(msg)
+ x
+ }
+
+ def xToken(that: Char) {
+ if (ch == that) nextch
+ else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+ }
+ def xToken(that: Seq[Char]) { that foreach xToken }
+
+ /** scan [S] '=' [S]*/
+ def xEQ() = { xSpaceOpt; xToken('='); xSpaceOpt }
+
+ /** skip optional space S? */
+ def xSpaceOpt() = while (isSpace(ch) && !eof) nextch
+
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpace() =
+ if (isSpace(ch)) { nextch; xSpaceOpt }
+ else xHandleError(ch, "whitespace expected")
+
+ /** Apply a function and return the passed value */
+ def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+
+ /** Execute body with a variable saved and restored after execution */
+ def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+ val saved = getter
+ try body
+ finally setter(saved)
+ }
+
+ /** Take characters from input stream until given String "until"
+ * is seen. Once seen, the accumulated characters are passed
+ * along with the current Position to the supplied handler function.
+ */
+ protected def xTakeUntil[T](
+ handler: (PositionType, String) => T,
+ positioner: () => PositionType,
+ until: String): T =
+ {
+ val sb = new StringBuilder
+ val head = until.head
+ val rest = until.tail
+
+ while (true) {
+ if (ch == head && peek(rest))
+ return handler(positioner(), sb.toString)
+ else if (ch == SU)
+ truncatedError("") // throws TruncatedXMLControl in compiler
+
+ sb append ch
+ nextch
+ }
+ unreachable
+ }
+
+ /** Create a non-destructive lookahead reader and see if the head
+ * of the input would match the given String. If yes, return true
+ * and drop the entire String from input; if no, return false
+ * and leave input unchanged.
+ */
+ private def peek(lookingFor: String): Boolean =
+ (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+ // drop the chars from the real reader (all lookahead + orig)
+ (0 to lookingFor.length) foreach (_ => nextch)
+ true
+ }
+}