Remove dependency on xml in ast.parser
Copied the following files from scala.xml to ast.parser.xml: - MarkupParsers differs from scala.xml.MarkupParsers only in the first 17 lines - Utility.scala was refactored and reduced (also includes TokenTests)
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+/** This is not a public trait - it contains common code shared
+ * between the library level XML parser and the compiler's.
+ * All members should be accessed through those.
+ */
+private[scala] trait MarkupParserCommon {
+ import Utility._
+ import scala.reflect.internal.Chars.SU
+ protected def unreachable = scala.sys.error("Cannot be reached.")
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
+ type InputType // Source, CharArrayReader
+ type PositionType // Int, Position
+ type ElementType // NodeSeq, Tree
+ type NamespaceType // NamespaceBinding, Any
+ type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+ def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+ def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+ xSpaceOpt()
+ (name, mkAttributes(name, pscope))
+ }
+ /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+ *
+ * see [15]
+ */
+ def xProcInstr: ElementType = {
+ val n = xName
+ xSpaceOpt()
+ xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+ }
+ /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+ @param endCh either `'` or `"`
+ */
+ def xAttributeValue(endCh: Char): String = {
+ val buf = new StringBuilder
+ while (ch != endCh) {
+ // well-formedness constraint
+ if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+ else if (ch == SU) truncatedError("")
+ else buf append ch_returning_nextch
+ }
+ ch_returning_nextch
+ // @todo: normalize attribute value
+ buf.toString
+ }
+ def xAttributeValue(): String = {
+ val str = xAttributeValue(ch_returning_nextch)
+ // well-formedness constraint
+ normalizeAttributeValue(str)
+ }
+ private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+ val buf = new StringBuilder
+ while (it.hasNext) match {
+ case `end` => return buf.toString
+ case ch => buf append ch
+ }
+ scala.sys.error("Expected '%s'".format(end))
+ }
+ /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
+ */
+ def xEndTag(startName: String) {
+ xToken('/')
+ if (xName != startName)
+ errorNoEnd(startName)
+ xSpaceOpt()
+ xToken('>')
+ }
+ /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
+ * Name ::= (Letter | '_') (NameChar)*
+ *
+ * see [5] of XML 1.0 specification
+ *
+ * pre-condition: ch != ':' // assured by definition of XMLSTART token
+ * post-condition: name does neither start, nor end in ':'
+ */
+ def xName: String = {
+ if (ch == SU)
+ truncatedError("")
+ else if (!isNameStart(ch))
+ return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+ val buf = new StringBuilder
+ do buf append ch_returning_nextch
+ while (isNameChar(ch))
+ if (buf.last == ':') {
+ reportSyntaxError( "name cannot end in ':'" )
+ buf.toString dropRight 1
+ }
+ else buf.toString
+ }
+ private def attr_unescape(s: String) = s match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case "quote" => "\""
+ case _ => "&" + s + ";"
+ }
+ /** Replaces only character references right now.
+ * see spec 3.3.3
+ */
+ private def normalizeAttributeValue(attval: String): String = {
+ val buf = new StringBuilder
+ val it = attval.iterator.buffered
+ while (it.hasNext) buf append ( match {
+ case ' ' | '\t' | '\n' | '\r' => " "
+ case '&' if it.head == '#' => ; xCharRef(it)
+ case '&' => attr_unescape(takeUntilChar(it, ';'))
+ case c => c
+ })
+ buf.toString
+ }
+ /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+ * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ *
+ * see [66]
+ */
+ def xCharRef(ch: () => Char, nextch: () => Unit): String =
+ Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+ def xCharRef(it: Iterator[Char]): String = {
+ var c =
+ Utility.parseCharRef(() => c, () => { c = }, reportSyntaxError _, truncatedError _)
+ }
+ def xCharRef: String = xCharRef(() => ch, () => nextch())
+ /** Create a lookahead reader which does not influence the input */
+ def lookahead(): BufferedIterator[Char]
+ /** The library and compiler parsers had the interesting distinction of
+ * different behavior for nextch (a function for which there are a total
+ * of two plausible behaviors, so we know the design space was fully
+ * explored.) One of them returned the value of nextch before the increment
+ * and one of them the new value. So to unify code we have to at least
+ * temporarily abstract over the nextchs.
+ */
+ def ch: Char
+ def nextch(): Unit
+ protected def ch_returning_nextch: Char
+ def eof: Boolean
+ // def handle: HandleType
+ var tmppos: PositionType
+ def xHandleError(that: Char, msg: String): Unit
+ def reportSyntaxError(str: String): Unit
+ def reportSyntaxError(pos: Int, str: String): Unit
+ def truncatedError(msg: String): Nothing
+ def errorNoEnd(tag: String): Nothing
+ protected def errorAndResult[T](msg: String, x: T): T = {
+ reportSyntaxError(msg)
+ x
+ }
+ def xToken(that: Char) {
+ if (ch == that) nextch()
+ else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+ }
+ def xToken(that: Seq[Char]) { that foreach xToken }
+ /** scan [S] '=' [S]*/
+ def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() }
+ /** skip optional space S? */
+ def xSpaceOpt() = while (isSpace(ch) && !eof) nextch()
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpace() =
+ if (isSpace(ch)) { nextch(); xSpaceOpt() }
+ else xHandleError(ch, "whitespace expected")
+ /** Apply a function and return the passed value */
+ def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+ /** Execute body with a variable saved and restored after execution */
+ def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+ val saved = getter
+ try body
+ finally setter(saved)
+ }
+ /** Take characters from input stream until given String "until"
+ * is seen. Once seen, the accumulated characters are passed
+ * along with the current Position to the supplied handler function.
+ */
+ protected def xTakeUntil[T](
+ handler: (PositionType, String) => T,
+ positioner: () => PositionType,
+ until: String): T =
+ {
+ val sb = new StringBuilder
+ val head = until.head
+ val rest = until.tail
+ while (true) {
+ if (ch == head && peek(rest))
+ return handler(positioner(), sb.toString)
+ else if (ch == SU)
+ truncatedError("") // throws TruncatedXMLControl in compiler
+ sb append ch
+ nextch()
+ }
+ unreachable
+ }
+ /** Create a non-destructive lookahead reader and see if the head
+ * of the input would match the given String. If yes, return true
+ * and drop the entire String from input; if no, return false
+ * and leave input unchanged.
+ */
+ private def peek(lookingFor: String): Boolean =
+ (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+ // drop the chars from the real reader (all lookahead + orig)
+ (0 to lookingFor.length) foreach (_ => nextch())
+ true
+ }
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+import scala.collection.mutable
+ * The `Utility` object provides utility functions for processing instances
+ * of bound and not bound XML classes, as well as escaping text nodes.
+ *
+ * @author Burak Emir
+ */
+object Utility {
+ import scala.reflect.internal.Chars.SU
+ private val unescMap = Map(
+ "lt" -> '<',
+ "gt" -> '>',
+ "amp" -> '&',
+ "quot" -> '"',
+ "apos" -> '\''
+ )
+ /**
+ * Appends unescaped string to `s`, `amp` becomes `&amp;`,
+ * `lt` becomes `&lt;` etc..
+ *
+ * @return `'''null'''` if `ref` was not a predefined entity.
+ */
+ private final def unescape(ref: String, s: StringBuilder): StringBuilder =
+ ((unescMap get ref) map (s append _)).orNull
+ def parseAttributeValue[T](value: String, text: String => T, entityRef: String => T): List[T] = {
+ val sb = new StringBuilder
+ var rfb: StringBuilder = null
+ val nb = new mutable.ListBuffer[T]()
+ val it = value.iterator
+ while (it.hasNext) {
+ var c =
+ // entity! flush buffer into text node
+ if (c == '&') {
+ c =
+ if (c == '#') {
+ c =
+ val theChar = parseCharRef ({ ()=> c },{ () => c = },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)})
+ sb.append(theChar)
+ }
+ else {
+ if (rfb eq null) rfb = new StringBuilder()
+ rfb append c
+ c =
+ while (c != ';') {
+ rfb.append(c)
+ c =
+ }
+ val ref = rfb.toString()
+ rfb.clear()
+ unescape(ref,sb) match {
+ case null =>
+ if (!sb.isEmpty) { // flush buffer
+ nb += text(sb.toString())
+ sb.clear()
+ }
+ nb += entityRef(ref) // add entityref
+ case _ =>
+ }
+ }
+ }
+ else sb append c
+ }
+ if(!sb.isEmpty) // flush buffer
+ nb += text(sb.toString())
+ nb.toList
+ }
+ /**
+ * {{{
+ * CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
+ * | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ * }}}
+ * See [66]
+ */
+ def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = {
+ val hex = (ch() == 'x') && { nextch(); true }
+ val base = if (hex) 16 else 10
+ var i = 0
+ while (ch() != ';') {
+ ch() match {
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+ i = i * base + ch().asDigit
+ case 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
+ | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' =>
+ if (! hex)
+ reportSyntaxError("hex char not allowed in decimal char ref\n" +
+ "Did you mean to write &#x ?")
+ else
+ i = i * base + ch().asDigit
+ case SU =>
+ reportTruncatedError("")
+ case _ =>
+ reportSyntaxError("character '" + ch() + "' not allowed in char ref\n")
+ }
+ nextch()
+ }
+ new String(Array(i), 0, 1)
+ }
+ /** {{{
+ * (#x20 | #x9 | #xD | #xA)
+ * }}} */
+ final def isSpace(ch: Char): Boolean = ch match {
+ case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true
+ case _ => false
+ }
+ /** {{{
+ * (#x20 | #x9 | #xD | #xA)+
+ * }}} */
+ final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace)
+ /** {{{
+ * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+ * | CombiningChar | Extender
+ * }}}
+ * See [4] and Appendix B of XML 1.0 specification.
+ */
+ def isNameChar(ch: Char) = {
+ import java.lang.Character._
+ // The constants represent groups Mc, Me, Mn, Lm, and Nd.
+ isNameStart(ch) || (getType(ch).toByte match {
+ case _ => ".-:" contains ch
+ })
+ }
+ /** {{{
+ * NameStart ::= ( Letter | '_' )
+ * }}}
+ * where Letter means in one of the Unicode general
+ * categories `{ Ll, Lu, Lo, Lt, Nl }`.
+ *
+ * We do not allow a name to start with `:`.
+ * See [3] and Appendix B of XML 1.0 specification
+ */
+ def isNameStart(ch: Char) = {
+ import java.lang.Character._
+ getType(ch).toByte match {
+ case _ => ch == '_'
+ }
+ }
+ /** {{{
+ * Name ::= ( Letter | '_' ) (NameChar)*
+ * }}}
+ * See [5] of XML 1.0 specification.
+ */
+ def isName(s: String) =
+ s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar)