diff options
5 files changed, 441 insertions, 14 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala index 70b9bf3168..d3f495f280 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala @@ -10,7 +10,7 @@ import scala.collection.mutable import mutable.{ Buffer, ArrayBuffer, ListBuffer } import scala.util.control.ControlThrowable import scala.tools.nsc.util.CharArrayReader -import scala.xml.parsing.MarkupParserCommon +import scala.tools.nsc.ast.parser.xml.{MarkupParserCommon, Utility} import scala.reflect.internal.Chars.{ SU, LF } // XXX/Note: many/most of the functions in here are almost direct cut and pastes @@ -41,7 +41,7 @@ trait MarkupParsers { import global._ class MarkupParser(parser: SourceFileParser, final val preserveWS: Boolean) extends MarkupParserCommon { - + import Utility.{ isNameStart, isSpace } import Tokens.{ LBRACE, RBRACE } type PositionType = Position diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index 82a3144304..a8162a01bf 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -12,7 +12,7 @@ import Tokens._ import scala.annotation.{ switch, tailrec } import scala.collection.{ mutable, immutable } import mutable.{ ListBuffer, ArrayBuffer } -import scala.xml.Utility.{ isNameStart } +import scala.tools.nsc.ast.parser.xml.Utility.isNameStart import scala.language.postfixOps /** See Parsers.scala / ParsersCommon for some explanation of ScannersCommon. diff --git a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala index f326212d5b..1abc0c860c 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala @@ -7,8 +7,6 @@ package scala.tools.nsc package ast.parser import scala.collection.{ mutable, immutable } -import scala.xml.{ EntityRef, Text } -import scala.xml.XML.{ xmlns } import symtab.Flags.MUTABLE import scala.reflect.internal.util.StringOps.splitWhere @@ -143,14 +141,12 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { (buf map convertToTextPat).toList def parseAttribute(pos: Position, s: String): Tree = { - val ts = scala.xml.Utility.parseAttributeValue(s) map { - case Text(s) => text(pos, s) - case EntityRef(s) => entityRef(pos, s) - } - ts.length match { - case 0 => gen.mkNil - case 1 => ts.head - case _ => makeXMLseq(pos, ts.toList) + import xml.Utility.parseAttributeValue + + parseAttributeValue(s, text(pos, _), entityRef(pos, _)) match { + case Nil => gen.mkNil + case t :: Nil => t + case ts => makeXMLseq(pos, ts.toList) } } @@ -198,7 +194,7 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { /* Extract all the namespaces from the attribute map. */ val namespaces: List[Tree] = - for (z <- attrMap.keys.toList ; if z startsWith xmlns) yield { + for (z <- attrMap.keys.toList ; if z startsWith "xmlns") yield { val ns = splitPrefix(z) match { case (Some(_), rest) => rest case _ => null diff --git a/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala b/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala new file mode 100644 index 0000000000..f6cfb64ed8 --- /dev/null +++ b/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala @@ -0,0 +1,255 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.tools.nsc.ast.parser.xml + +/** This is not a public trait - it contains common code shared + * between the library level XML parser and the compiler's. + * All members should be accessed through those. + */ +private[scala] trait MarkupParserCommon { + import Utility._ + import scala.reflect.internal.Chars.SU + + protected def unreachable = scala.sys.error("Cannot be reached.") + + // type HandleType // MarkupHandler, SymbolicXMLBuilder + type InputType // Source, CharArrayReader + type PositionType // Int, Position + type ElementType // NodeSeq, Tree + type NamespaceType // NamespaceBinding, Any + type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] + + def mkAttributes(name: String, pscope: NamespaceType): AttributesType + def mkProcInstr(position: PositionType, name: String, text: String): ElementType + + /** parse a start or empty tag. + * [40] STag ::= '<' Name { S Attribute } [S] + * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] + */ + protected def xTag(pscope: NamespaceType): (String, AttributesType) = { + val name = xName + xSpaceOpt() + + (name, mkAttributes(name, pscope)) + } + + /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>' + * + * see [15] + */ + def xProcInstr: ElementType = { + val n = xName + xSpaceOpt() + xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") + } + + /** attribute value, terminated by either `'` or `"`. value may not contain `<`. + @param endCh either `'` or `"` + */ + def xAttributeValue(endCh: Char): String = { + val buf = new StringBuilder + while (ch != endCh) { + // well-formedness constraint + if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") + else if (ch == SU) truncatedError("") + else buf append ch_returning_nextch + } + ch_returning_nextch + // @todo: normalize attribute value + buf.toString + } + + def xAttributeValue(): String = { + val str = xAttributeValue(ch_returning_nextch) + // well-formedness constraint + normalizeAttributeValue(str) + } + + private def takeUntilChar(it: Iterator[Char], end: Char): String = { + val buf = new StringBuilder + while (it.hasNext) it.next() match { + case `end` => return buf.toString + case ch => buf append ch + } + scala.sys.error("Expected '%s'".format(end)) + } + + /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' + */ + def xEndTag(startName: String) { + xToken('/') + if (xName != startName) + errorNoEnd(startName) + + xSpaceOpt() + xToken('>') + } + + /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen + * Name ::= (Letter | '_') (NameChar)* + * + * see [5] of XML 1.0 specification + * + * pre-condition: ch != ':' // assured by definition of XMLSTART token + * post-condition: name does neither start, nor end in ':' + */ + def xName: String = { + if (ch == SU) + truncatedError("") + else if (!isNameStart(ch)) + return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") + + val buf = new StringBuilder + + do buf append ch_returning_nextch + while (isNameChar(ch)) + + if (buf.last == ':') { + reportSyntaxError( "name cannot end in ':'" ) + buf.toString dropRight 1 + } + else buf.toString + } + + private def attr_unescape(s: String) = s match { + case "lt" => "<" + case "gt" => ">" + case "amp" => "&" + case "apos" => "'" + case "quot" => "\"" + case "quote" => "\"" + case _ => "&" + s + ";" + } + + /** Replaces only character references right now. + * see spec 3.3.3 + */ + private def normalizeAttributeValue(attval: String): String = { + val buf = new StringBuilder + val it = attval.iterator.buffered + + while (it.hasNext) buf append (it.next() match { + case ' ' | '\t' | '\n' | '\r' => " " + case '&' if it.head == '#' => it.next() ; xCharRef(it) + case '&' => attr_unescape(takeUntilChar(it, ';')) + case c => c + }) + + buf.toString + } + + /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * + * see [66] + */ + def xCharRef(ch: () => Char, nextch: () => Unit): String = + Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) + + def xCharRef(it: Iterator[Char]): String = { + var c = it.next() + Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _) + } + + def xCharRef: String = xCharRef(() => ch, () => nextch()) + + /** Create a lookahead reader which does not influence the input */ + def lookahead(): BufferedIterator[Char] + + /** The library and compiler parsers had the interesting distinction of + * different behavior for nextch (a function for which there are a total + * of two plausible behaviors, so we know the design space was fully + * explored.) One of them returned the value of nextch before the increment + * and one of them the new value. So to unify code we have to at least + * temporarily abstract over the nextchs. + */ + def ch: Char + def nextch(): Unit + protected def ch_returning_nextch: Char + def eof: Boolean + + // def handle: HandleType + var tmppos: PositionType + + def xHandleError(that: Char, msg: String): Unit + def reportSyntaxError(str: String): Unit + def reportSyntaxError(pos: Int, str: String): Unit + + def truncatedError(msg: String): Nothing + def errorNoEnd(tag: String): Nothing + + protected def errorAndResult[T](msg: String, x: T): T = { + reportSyntaxError(msg) + x + } + + def xToken(that: Char) { + if (ch == that) nextch() + else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) + } + def xToken(that: Seq[Char]) { that foreach xToken } + + /** scan [S] '=' [S]*/ + def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() } + + /** skip optional space S? */ + def xSpaceOpt() = while (isSpace(ch) && !eof) nextch() + + /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ + def xSpace() = + if (isSpace(ch)) { nextch(); xSpaceOpt() } + else xHandleError(ch, "whitespace expected") + + /** Apply a function and return the passed value */ + def returning[T](x: T)(f: T => Unit): T = { f(x); x } + + /** Execute body with a variable saved and restored after execution */ + def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { + val saved = getter + try body + finally setter(saved) + } + + /** Take characters from input stream until given String "until" + * is seen. Once seen, the accumulated characters are passed + * along with the current Position to the supplied handler function. + */ + protected def xTakeUntil[T]( + handler: (PositionType, String) => T, + positioner: () => PositionType, + until: String): T = + { + val sb = new StringBuilder + val head = until.head + val rest = until.tail + + while (true) { + if (ch == head && peek(rest)) + return handler(positioner(), sb.toString) + else if (ch == SU) + truncatedError("") // throws TruncatedXMLControl in compiler + + sb append ch + nextch() + } + unreachable + } + + /** Create a non-destructive lookahead reader and see if the head + * of the input would match the given String. If yes, return true + * and drop the entire String from input; if no, return false + * and leave input unchanged. + */ + private def peek(lookingFor: String): Boolean = + (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { + // drop the chars from the real reader (all lookahead + orig) + (0 to lookingFor.length) foreach (_ => nextch()) + true + } +} diff --git a/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala b/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala new file mode 100755 index 0000000000..39e4831af2 --- /dev/null +++ b/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala @@ -0,0 +1,176 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.tools.nsc.ast.parser.xml + +import scala.collection.mutable + + +/** + * The `Utility` object provides utility functions for processing instances + * of bound and not bound XML classes, as well as escaping text nodes. + * + * @author Burak Emir + */ +object Utility { + import scala.reflect.internal.Chars.SU + + private val unescMap = Map( + "lt" -> '<', + "gt" -> '>', + "amp" -> '&', + "quot" -> '"', + "apos" -> '\'' + ) + + /** + * Appends unescaped string to `s`, `amp` becomes `&`, + * `lt` becomes `<` etc.. + * + * @return `'''null'''` if `ref` was not a predefined entity. + */ + private final def unescape(ref: String, s: StringBuilder): StringBuilder = + ((unescMap get ref) map (s append _)).orNull + + def parseAttributeValue[T](value: String, text: String => T, entityRef: String => T): List[T] = { + val sb = new StringBuilder + var rfb: StringBuilder = null + val nb = new mutable.ListBuffer[T]() + + val it = value.iterator + while (it.hasNext) { + var c = it.next() + // entity! flush buffer into text node + if (c == '&') { + c = it.next() + if (c == '#') { + c = it.next() + val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)}) + sb.append(theChar) + } + else { + if (rfb eq null) rfb = new StringBuilder() + rfb append c + c = it.next() + while (c != ';') { + rfb.append(c) + c = it.next() + } + val ref = rfb.toString() + rfb.clear() + unescape(ref,sb) match { + case null => + if (!sb.isEmpty) { // flush buffer + nb += text(sb.toString()) + sb.clear() + } + nb += entityRef(ref) // add entityref + case _ => + } + } + } + else sb append c + } + + if(!sb.isEmpty) // flush buffer + nb += text(sb.toString()) + + nb.toList + } + + /** + * {{{ + * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * }}} + * See [66] + */ + def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = { + val hex = (ch() == 'x') && { nextch(); true } + val base = if (hex) 16 else 10 + var i = 0 + while (ch() != ';') { + ch() match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + i = i * base + ch().asDigit + case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => + if (! hex) + reportSyntaxError("hex char not allowed in decimal char ref\n" + + "Did you mean to write &#x ?") + else + i = i * base + ch().asDigit + case SU => + reportTruncatedError("") + case _ => + reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") + } + nextch() + } + new String(Array(i), 0, 1) + } + + /** {{{ + * (#x20 | #x9 | #xD | #xA) + * }}} */ + final def isSpace(ch: Char): Boolean = ch match { + case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true + case _ => false + } + /** {{{ + * (#x20 | #x9 | #xD | #xA)+ + * }}} */ + final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) + + /** {{{ + * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' + * | CombiningChar | Extender + * }}} + * See [4] and Appendix B of XML 1.0 specification. + */ + def isNameChar(ch: Char) = { + import java.lang.Character._ + // The constants represent groups Mc, Me, Mn, Lm, and Nd. + + isNameStart(ch) || (getType(ch).toByte match { + case COMBINING_SPACING_MARK | + ENCLOSING_MARK | NON_SPACING_MARK | + MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true + case _ => ".-:" contains ch + }) + } + + /** {{{ + * NameStart ::= ( Letter | '_' ) + * }}} + * where Letter means in one of the Unicode general + * categories `{ Ll, Lu, Lo, Lt, Nl }`. + * + * We do not allow a name to start with `:`. + * See [3] and Appendix B of XML 1.0 specification + */ + def isNameStart(ch: Char) = { + import java.lang.Character._ + + getType(ch).toByte match { + case LOWERCASE_LETTER | + UPPERCASE_LETTER | OTHER_LETTER | + TITLECASE_LETTER | LETTER_NUMBER => true + case _ => ch == '_' + } + } + + /** {{{ + * Name ::= ( Letter | '_' ) (NameChar)* + * }}} + * See [5] of XML 1.0 specification. + */ + def isName(s: String) = + s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) + +} |