aboutsummaryrefslogtreecommitdiff
path: root/compiler/src/dotty/tools/dotc/parsing/MarkupParsers.scala
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/src/dotty/tools/dotc/parsing/MarkupParsers.scala')
-rw-r--r--compiler/src/dotty/tools/dotc/parsing/MarkupParsers.scala466
1 files changed, 466 insertions, 0 deletions
diff --git a/compiler/src/dotty/tools/dotc/parsing/MarkupParsers.scala b/compiler/src/dotty/tools/dotc/parsing/MarkupParsers.scala
new file mode 100644
index 000000000..f648b9e2c
--- /dev/null
+++ b/compiler/src/dotty/tools/dotc/parsing/MarkupParsers.scala
@@ -0,0 +1,466 @@
+package dotty.tools
+package dotc
+package parsing
+
+import scala.collection.mutable
+import mutable.{ Buffer, ArrayBuffer, ListBuffer }
+import scala.util.control.ControlThrowable
+import scala.reflect.internal.Chars.SU
+import Parsers._
+import util.Positions._
+import core._
+import Constants._
+import Utility._
+
+
+// XXX/Note: many/most of the functions in here are almost direct cut and pastes
+// from another file - scala.xml.parsing.MarkupParser, it looks like.
+// (It was like that when I got here.) They used to be commented "[Duplicate]" butx
+// since approximately all of them were, I snipped it as noise. As far as I can
+// tell this wasn't for any particularly good reason, but slightly different
+// compiler and library parser interfaces meant it would take some setup.
+//
+// I rewrote most of these, but not as yet the library versions: so if you are
+// tempted to touch any of these, please be aware of that situation and try not
+// to let it get any worse. -- paulp
+
+/** This trait ...
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object MarkupParsers {
+
+ import ast.untpd._
+
+ case object MissingEndTagControl extends ControlThrowable {
+ override def getMessage = "start tag was here: "
+ }
+
+ case object ConfusedAboutBracesControl extends ControlThrowable {
+ override def getMessage = " I encountered a '}' where I didn't expect one, maybe this tag isn't closed <"
+ }
+
+ case object TruncatedXMLControl extends ControlThrowable {
+ override def getMessage = "input ended while parsing XML"
+ }
+
+ class MarkupParser(parser: Parser, final val preserveWS: Boolean) extends MarkupParserCommon {
+
+ import Tokens.{ LBRACE, RBRACE }
+
+ type PositionType = Position
+ type InputType = CharArrayReader
+ type ElementType = Tree
+ type AttributesType = mutable.Map[String, Tree]
+ type NamespaceType = Any // namespaces ignored
+
+ def mkAttributes(name: String, other: NamespaceType): AttributesType = xAttributes
+
+ val eof = false
+
+ def truncatedError(msg: String): Nothing = throw TruncatedXMLControl
+ def xHandleError(that: Char, msg: String) =
+ if (ch == SU) throw TruncatedXMLControl
+ else reportSyntaxError(msg)
+
+ var input : CharArrayReader = _
+ def lookahead(): BufferedIterator[Char] =
+ (input.buf drop input.charOffset).iterator.buffered
+
+ import parser.{ symbXMLBuilder => handle }
+
+ def curOffset : Int = input.charOffset - 1
+ var tmppos : Position = NoPosition
+ def ch = input.ch
+ /** this method assign the next character to ch and advances in input */
+ def nextch(): Unit = { input.nextChar() }
+
+ protected def ch_returning_nextch: Char = {
+ val result = ch; input.nextChar(); result
+ }
+
+ def mkProcInstr(position: Position, name: String, text: String): ElementType =
+ parser.symbXMLBuilder.procInstr(position, name, text)
+
+ var xEmbeddedBlock = false
+
+ private var debugLastStartElement = new mutable.Stack[(Int, String)]
+ private def debugLastPos = debugLastStartElement.top._1
+ private def debugLastElem = debugLastStartElement.top._2
+
+ private def errorBraces() = {
+ reportSyntaxError("in XML content, please use '}}' to express '}'")
+ throw ConfusedAboutBracesControl
+ }
+ def errorNoEnd(tag: String) = {
+ reportSyntaxError("expected closing tag of " + tag)
+ throw MissingEndTagControl
+ }
+
+ /** checks whether next character starts a Scala block, if yes, skip it.
+ * @return true if next character starts a scala block
+ */
+ def xCheckEmbeddedBlock: Boolean = {
+ // attentions, side-effect, used in xText
+ xEmbeddedBlock = (ch == '{') && { nextch; (ch != '{') }
+ xEmbeddedBlock
+ }
+
+ /** parse attribute and add it to listmap
+ * [41] Attributes ::= { S Name Eq AttValue }
+ * AttValue ::= `'` { _ } `'`
+ * | `"` { _ } `"`
+ * | `{` scalablock `}`
+ */
+ def xAttributes = {
+ val aMap = mutable.LinkedHashMap[String, Tree]()
+
+ while (isNameStart(ch)) {
+ val start = curOffset
+ val key = xName
+ xEQ
+ val delim = ch
+ val mid = curOffset
+ val value: Tree = ch match {
+ case '"' | '\'' =>
+ val tmp = xAttributeValue(ch_returning_nextch)
+
+ try handle.parseAttribute(Position(start, curOffset, mid), tmp)
+ catch {
+ case e: RuntimeException =>
+ errorAndResult("error parsing attribute value", parser.errorTermTree)
+ }
+
+ case '{' =>
+ nextch
+ xEmbeddedExpr
+ case SU =>
+ throw TruncatedXMLControl
+ case _ =>
+ errorAndResult("' or \" delimited attribute value or '{' scala-expr '}' expected", Literal(Constant("<syntax-error>")))
+ }
+ // well-formedness constraint: unique attribute names
+ if (aMap contains key)
+ reportSyntaxError("attribute %s may only be defined once" format key)
+
+ aMap(key) = value
+ if (ch != '/' && ch != '>')
+ xSpace
+ }
+ aMap
+ }
+
+ /** '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>'
+ *
+ * see [15]
+ */
+ def xCharData: Tree = {
+ val start = curOffset
+ xToken("[CDATA[")
+ val mid = curOffset
+ xTakeUntil(handle.charData, () => Position(start, curOffset, mid), "]]>")
+ }
+
+ def xUnparsed: Tree = {
+ val start = curOffset
+ xTakeUntil(handle.unparsed, () => Position(start, curOffset, start), "</xml:unparsed>")
+ }
+
+ /** Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+ *
+ * see [15]
+ */
+ def xComment: Tree = {
+ val start = curOffset - 2 // Rewinding to include "<!"
+ xToken("--")
+ xTakeUntil(handle.comment, () => Position(start, curOffset, start), "-->")
+ }
+
+ def appendText(pos: Position, ts: Buffer[Tree], txt: String): Unit = {
+ def append(t: String) = ts append handle.text(pos, t)
+
+ if (preserveWS) append(txt)
+ else {
+ val sb = new StringBuilder()
+
+ txt foreach { c =>
+ if (!isSpace(c)) sb append c
+ else if (sb.isEmpty || !isSpace(sb.last)) sb append ' '
+ }
+
+ val trimmed = sb.toString.trim
+ if (!trimmed.isEmpty) append(trimmed)
+ }
+ }
+
+ /** adds entity/character to ts as side-effect
+ * @precond ch == '&'
+ */
+ def content_AMP(ts: ArrayBuffer[Tree]): Unit = {
+ nextch
+ val toAppend = ch match {
+ case '#' => // CharacterRef
+ nextch
+ val theChar = handle.text(tmppos, xCharRef)
+ xToken(';')
+ theChar
+ case _ => // EntityRef
+ val n = xName
+ xToken(';')
+ handle.entityRef(tmppos, n)
+ }
+
+ ts append toAppend
+ }
+
+ /**
+ * @precond ch == '{'
+ * @postcond: xEmbeddedBlock == false!
+ */
+ def content_BRACE(p: Position, ts: ArrayBuffer[Tree]): Unit =
+ if (xCheckEmbeddedBlock) ts append xEmbeddedExpr
+ else appendText(p, ts, xText)
+
+ /** Returns true if it encounters an end tag (without consuming it),
+ * appends trees to ts as side-effect.
+ *
+ * @param ts ...
+ * @return ...
+ */
+ private def content_LT(ts: ArrayBuffer[Tree]): Boolean = {
+ if (ch == '/')
+ return true // end tag
+
+ val toAppend = ch match {
+ case '!' => nextch ; if (ch =='[') xCharData else xComment // CDATA or Comment
+ case '?' => nextch ; xProcInstr // PI
+ case _ => element // child node
+ }
+
+ ts append toAppend
+ false
+ }
+
+ def content: Buffer[Tree] = {
+ val ts = new ArrayBuffer[Tree]
+ while (true) {
+ if (xEmbeddedBlock)
+ ts append xEmbeddedExpr
+ else {
+ tmppos = Position(curOffset)
+ ch match {
+ // end tag, cdata, comment, pi or child node
+ case '<' => nextch ; if (content_LT(ts)) return ts
+ // either the character '{' or an embedded scala block }
+ case '{' => content_BRACE(tmppos, ts) // }
+ // EntityRef or CharRef
+ case '&' => content_AMP(ts)
+ case SU => return ts
+ // text content - here xEmbeddedBlock might be true
+ case _ => appendText(tmppos, ts, xText)
+ }
+ }
+ }
+ unreachable
+ }
+
+ /** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag
+ * | xmlTag1 '/' '>'
+ */
+ def element: Tree = {
+ val start = curOffset
+ val (qname, attrMap) = xTag(())
+ if (ch == '/') { // empty element
+ xToken("/>")
+ handle.element(Position(start, curOffset, start), qname, attrMap, true, new ListBuffer[Tree])
+ }
+ else { // handle content
+ xToken('>')
+ if (qname == "xml:unparsed")
+ return xUnparsed
+
+ debugLastStartElement.push((start, qname))
+ val ts = content
+ xEndTag(qname)
+ debugLastStartElement.pop
+ val pos = Position(start, curOffset, start)
+ qname match {
+ case "xml:group" => handle.group(pos, ts)
+ case _ => handle.element(pos, qname, attrMap, false, ts)
+ }
+ }
+ }
+
+ /** parse character data.
+ * precondition: xEmbeddedBlock == false (we are not in a scala block)
+ */
+ private def xText: String = {
+ assert(!xEmbeddedBlock, "internal error: encountered embedded block")
+ val buf = new StringBuilder
+ def done = buf.toString
+
+ while (ch != SU) {
+ if (ch == '}') {
+ if (charComingAfter(nextch) == '}') nextch
+ else errorBraces()
+ }
+
+ buf append ch
+ nextch
+ if (xCheckEmbeddedBlock || ch == '<' || ch == '&')
+ return done
+ }
+ done
+ }
+
+ /** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */
+ private def xLiteralCommon(f: () => Tree, ifTruncated: String => Unit): Tree = {
+ try return f()
+ catch {
+ case c @ TruncatedXMLControl =>
+ ifTruncated(c.getMessage)
+ case c @ (MissingEndTagControl | ConfusedAboutBracesControl) =>
+ parser.syntaxError(c.getMessage + debugLastElem + ">", debugLastPos)
+ case _: ArrayIndexOutOfBoundsException =>
+ parser.syntaxError("missing end tag in XML literal for <%s>" format debugLastElem, debugLastPos)
+ }
+ finally parser.in resume Tokens.XMLSTART
+
+ parser.errorTermTree
+ }
+
+ /** Use a lookahead parser to run speculative body, and return the first char afterward. */
+ private def charComingAfter(body: => Unit): Char = {
+ try {
+ input = input.lookaheadReader
+ body
+ ch
+ }
+ finally input = parser.in
+ }
+
+ /** xLiteral = element { element }
+ * @return Scala representation of this xml literal
+ */
+ def xLiteral: Tree = xLiteralCommon(
+ () => {
+ input = parser.in
+ handle.isPattern = false
+
+ val ts = new ArrayBuffer[Tree]
+ val start = curOffset
+ tmppos = Position(curOffset) // Iuli: added this line, as it seems content_LT uses tmppos when creating trees
+ content_LT(ts)
+
+ // parse more XML ?
+ if (charComingAfter(xSpaceOpt) == '<') {
+ xSpaceOpt
+ while (ch == '<') {
+ nextch
+ ts append element
+ xSpaceOpt
+ }
+ handle.makeXMLseq(Position(start, curOffset, start), ts)
+ }
+ else {
+ assert(ts.length == 1)
+ ts(0)
+ }
+ },
+ msg => parser.incompleteInputError(msg)
+ )
+
+ /** @see xmlPattern. resynchronizes after successful parse
+ * @return this xml pattern
+ */
+ def xLiteralPattern: Tree = xLiteralCommon(
+ () => {
+ input = parser.in
+ saving[Boolean, Tree](handle.isPattern, handle.isPattern = _) {
+ handle.isPattern = true
+ val tree = xPattern
+ xSpaceOpt
+ tree
+ }
+ },
+ msg => parser.syntaxError(msg, curOffset)
+ )
+
+ def escapeToScala[A](op: => A, kind: String) = {
+ xEmbeddedBlock = false
+ val res = saving[List[Int], A](parser.in.sepRegions, parser.in.sepRegions = _) {
+ parser.in resume LBRACE
+ op
+ }
+ if (parser.in.token != RBRACE)
+ reportSyntaxError(" expected end of Scala " + kind)
+
+ res
+ }
+
+ def xEmbeddedExpr: Tree = escapeToScala(parser.block(), "block")
+
+ /** xScalaPatterns ::= patterns
+ */
+ def xScalaPatterns: List[Tree] = escapeToScala(parser.patterns(), "pattern")
+
+ def reportSyntaxError(pos: Int, str: String) = parser.syntaxError(str, pos)
+ def reportSyntaxError(str: String): Unit = {
+ reportSyntaxError(curOffset, "in XML literal: " + str)
+ nextch()
+ }
+
+ /** '<' xPattern ::= Name [S] { xmlPattern | '{' pattern3 '}' } ETag
+ * | Name [S] '/' '>'
+ */
+ def xPattern: Tree = {
+ var start = curOffset
+ val qname = xName
+ debugLastStartElement.push((start, qname))
+ xSpaceOpt
+
+ val ts = new ArrayBuffer[Tree]
+
+ val isEmptyTag = ch == '/'
+ if (isEmptyTag) nextch()
+ xToken('>')
+
+ if (!isEmptyTag) {
+ // recurses until it hits a termination condition, then returns
+ def doPattern: Boolean = {
+ val start1 = curOffset
+ if (xEmbeddedBlock) ts ++= xScalaPatterns
+ else ch match {
+ case '<' => // tag
+ nextch
+ if (ch != '/') ts append xPattern // child
+ else return false // terminate
+
+ case '{' => // embedded Scala patterns
+ while (ch == '{') {
+ nextch
+ ts ++= xScalaPatterns
+ }
+ assert(!xEmbeddedBlock, "problem with embedded block")
+
+ case SU =>
+ throw TruncatedXMLControl
+
+ case _ => // text
+ appendText(Position(start1, curOffset, start1), ts, xText)
+ // here xEmbeddedBlock might be true:
+ // if (xEmbeddedBlock) throw new ApplicationError("after:" + text); // assert
+ }
+ true
+ }
+
+ while (doPattern) { } // call until false
+ xEndTag(qname)
+ debugLastStartElement.pop
+ }
+
+ handle.makeXMLpat(Position(start, curOffset, start), qname, ts)
+ }
+ } /* class MarkupParser */
+}