diff options
-rw-r--r-- | src/dotty/tools/dotc/parsing/MarkupParserCommon.scala | 11 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/MarkupParsers.scala | 26 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/Parsers.scala | 4 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/Scanners.scala | 8 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/ScriptParsers.scala | 14 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala | 7 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/Tokens.scala | 2 | ||||
-rw-r--r-- | src/dotty/tools/dotc/parsing/Utility.scala | 169 |
8 files changed, 196 insertions, 45 deletions
diff --git a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala index 5b51e9ed6..ce2c41797 100644 --- a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala +++ b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala @@ -8,21 +8,16 @@ package dotty.tools.dotc package parsing -import scala.xml._ -import scala.xml.parsing._ +import Utility._ +import scala.reflect.internal.Chars.SU -import scala.io.Source -import scala.xml.dtd._ -import scala.annotation.switch -import Utility.Escapes.{ pairs => unescape } -import Utility.SU /** This is not a public trait - it contains common code shared * between the library level XML parser and the compiler's. * All members should be accessed through those. */ -private[dotty] trait MarkupParserCommon extends TokenTests { +private[dotty] trait MarkupParserCommon { protected def unreachable = scala.sys.error("Cannot be reached.") // type HandleType // MarkupHandler, SymbolicXMLBuilder diff --git a/src/dotty/tools/dotc/parsing/MarkupParsers.scala b/src/dotty/tools/dotc/parsing/MarkupParsers.scala index 5d86121e0..3afaf171c 100644 --- a/src/dotty/tools/dotc/parsing/MarkupParsers.scala +++ b/src/dotty/tools/dotc/parsing/MarkupParsers.scala @@ -5,15 +5,12 @@ package parsing import scala.collection.mutable import mutable.{ Buffer, ArrayBuffer, ListBuffer } import scala.util.control.ControlThrowable -import util.SourceFile -import scala.xml.{ Text, TextBuffer } -import scala.xml.Utility.{ isNameStart, isNameChar, isSpace } -import scala.reflect.internal.Chars.{ SU, LF } +import scala.reflect.internal.Chars.SU import Parsers._ import util.Positions._ import core._ -import ast.Trees._ import Constants._ +import Utility._ // XXX/Note: many/most of the functions in here are almost direct cut and pastes @@ -50,7 +47,7 @@ object MarkupParsers { class MarkupParser(parser: Parser, final val preserveWS: Boolean) extends MarkupParserCommon { - import Tokens.{ EMPTY, LBRACE, RBRACE } + import Tokens.{ LBRACE, RBRACE } type PositionType = Position type InputType = CharArrayReader @@ -181,11 +178,20 @@ object MarkupParsers { } def appendText(pos: Position, ts: Buffer[Tree], txt: String): Unit = { - val toAppend = - if (preserveWS) Seq(txt) - else TextBuffer.fromString(txt).toText map (_.text) + def append(t: String) = ts append handle.text(pos, t) - toAppend foreach (t => ts append handle.text(pos, t)) + if (preserveWS) append(txt) + else { + val sb = new StringBuilder() + + txt foreach { c => + if (!isSpace(c)) sb append c + else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' + } + + val trimmed = sb.toString.trim + if (!trimmed.isEmpty) append(trimmed) + } } /** adds entity/character to ts as side-effect diff --git a/src/dotty/tools/dotc/parsing/Parsers.scala b/src/dotty/tools/dotc/parsing/Parsers.scala index 7e0910e1b..aea019bed 100644 --- a/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/src/dotty/tools/dotc/parsing/Parsers.scala @@ -13,14 +13,10 @@ import Flags._ import Contexts._ import Names._ import ast.Trees._ -import ast.TreeInfo import Decorators._ import StdNames._ import util.Positions._ -import Types._ import Constants._ -import NameOps._ -import util.Chars._ import ScriptParsers._ import annotation.switch import util.DotClass diff --git a/src/dotty/tools/dotc/parsing/Scanners.scala b/src/dotty/tools/dotc/parsing/Scanners.scala index c4ee199c6..4d8fdd10d 100644 --- a/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/src/dotty/tools/dotc/parsing/Scanners.scala @@ -2,7 +2,6 @@ package dotty.tools package dotc package parsing -import Tokens._ import core.Names._, core.Contexts._, core.Decorators._, util.Positions._ import core.StdNames._ import util.SourceFile @@ -10,9 +9,10 @@ import java.lang.Character.isDigit import scala.reflect.internal.Chars._ import Tokens._ import scala.annotation.{ switch, tailrec } -import scala.collection.{ mutable, immutable } -import mutable.{ ListBuffer, ArrayBuffer } -import scala.xml.Utility.isNameStart +import scala.collection.mutable +import mutable.ListBuffer +import Utility.isNameStart + object Scanners { diff --git a/src/dotty/tools/dotc/parsing/ScriptParsers.scala b/src/dotty/tools/dotc/parsing/ScriptParsers.scala index bd66c252d..8b5c51c34 100644 --- a/src/dotty/tools/dotc/parsing/ScriptParsers.scala +++ b/src/dotty/tools/dotc/parsing/ScriptParsers.scala @@ -2,21 +2,9 @@ package dotty.tools package dotc package parsing -import util.{ SourceFile, FreshNameCreator } +import util.SourceFile import core._ -import Flags._ import Contexts._ -import Names._ -import ast.Trees._ -import Decorators._ -import StdNames._ -import util.Chars.isScalaLetter -import util.Positions._ -import Types._ -import Constants._ -import NameOps._ -import scala.reflect.internal.Chars._ -import annotation.switch import Parsers._ diff --git a/src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala b/src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala index b497ffe9c..138172012 100644 --- a/src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala +++ b/src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala @@ -2,12 +2,11 @@ package dotty.tools package dotc package parsing -import scala.collection.{ mutable, immutable } +import scala.collection.mutable import scala.xml.{ EntityRef, Text } -import scala.xml.XML.{ xmlns } import core._ import Flags.Mutable -import Names._, NameOps._, StdNames._, Decorators._, ast.Trees._, ast.{tpd, untpd}, Constants._ +import Names._, StdNames._, ast.Trees._, ast.{tpd, untpd} import Symbols._, Contexts._ import util.Positions._ import Parsers.Parser @@ -203,7 +202,7 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(implicit ctx: Cont /** Extract all the namespaces from the attribute map. */ val namespaces: List[Tree] = - for (z <- attrMap.keys.toList ; if z startsWith xmlns) yield { + for (z <- attrMap.keys.toList ; if z startsWith "xmlns") yield { val ns = splitPrefix(z) match { case (Some(_), rest) => rest case _ => null diff --git a/src/dotty/tools/dotc/parsing/Tokens.scala b/src/dotty/tools/dotc/parsing/Tokens.scala index 73518c29f..09124d0d1 100644 --- a/src/dotty/tools/dotc/parsing/Tokens.scala +++ b/src/dotty/tools/dotc/parsing/Tokens.scala @@ -2,9 +2,7 @@ package dotty.tools package dotc package parsing -import collection.mutable import collection.immutable.BitSet -import scala.annotation.switch object Tokens { diff --git a/src/dotty/tools/dotc/parsing/Utility.scala b/src/dotty/tools/dotc/parsing/Utility.scala new file mode 100644 index 000000000..44ed268e1 --- /dev/null +++ b/src/dotty/tools/dotc/parsing/Utility.scala @@ -0,0 +1,169 @@ +package dotty.tools.dotc.parsing + +import scala.collection.mutable + + +/** + * The `Utility` object provides utility functions for processing instances + * of bound and not bound XML classes, as well as escaping text nodes. + * + * @author Burak Emir + */ +object Utility { + import scala.reflect.internal.Chars.SU + + private val unescMap = Map( + "lt" -> '<', + "gt" -> '>', + "amp" -> '&', + "quot" -> '"', + "apos" -> '\'' + ) + + /** + * Appends unescaped string to `s`, `amp` becomes `&`, + * `lt` becomes `<` etc.. + * + * @return `'''null'''` if `ref` was not a predefined entity. + */ + private final def unescape(ref: String, s: StringBuilder): StringBuilder = + ((unescMap get ref) map (s append _)).orNull + + def parseAttributeValue[T](value: String, text: String => T, entityRef: String => T): List[T] = { + val sb = new StringBuilder + var rfb: StringBuilder = null + val nb = new mutable.ListBuffer[T]() + + val it = value.iterator + while (it.hasNext) { + var c = it.next() + // entity! flush buffer into text node + if (c == '&') { + c = it.next() + if (c == '#') { + c = it.next() + val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)}) + sb.append(theChar) + } + else { + if (rfb eq null) rfb = new StringBuilder() + rfb append c + c = it.next() + while (c != ';') { + rfb.append(c) + c = it.next() + } + val ref = rfb.toString() + rfb.clear() + unescape(ref,sb) match { + case null => + if (!sb.isEmpty) { // flush buffer + nb += text(sb.toString()) + sb.clear() + } + nb += entityRef(ref) // add entityref + case _ => + } + } + } + else sb append c + } + + if(!sb.isEmpty) // flush buffer + nb += text(sb.toString()) + + nb.toList + } + + /** + * {{{ + * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * }}} + * See [66] + */ + def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = { + val hex = (ch() == 'x') && { nextch(); true } + val base = if (hex) 16 else 10 + var i = 0 + while (ch() != ';') { + ch() match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + i = i * base + ch().asDigit + case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => + if (! hex) + reportSyntaxError("hex char not allowed in decimal char ref\n" + + "Did you mean to write &#x ?") + else + i = i * base + ch().asDigit + case SU => + reportTruncatedError("") + case _ => + reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") + } + nextch() + } + new String(Array(i), 0, 1) + } + + /** {{{ + * (#x20 | #x9 | #xD | #xA) + * }}} */ + final def isSpace(ch: Char): Boolean = ch match { + case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true + case _ => false + } + /** {{{ + * (#x20 | #x9 | #xD | #xA)+ + * }}} */ + final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) + + /** {{{ + * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' + * | CombiningChar | Extender + * }}} + * See [4] and Appendix B of XML 1.0 specification. + */ + def isNameChar(ch: Char) = { + import java.lang.Character._ + // The constants represent groups Mc, Me, Mn, Lm, and Nd. + + isNameStart(ch) || (getType(ch).toByte match { + case COMBINING_SPACING_MARK | + ENCLOSING_MARK | NON_SPACING_MARK | + MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true + case _ => ".-:" contains ch + }) + } + + /** {{{ + * NameStart ::= ( Letter | '_' ) + * }}} + * where Letter means in one of the Unicode general + * categories `{ Ll, Lu, Lo, Lt, Nl }`. + * + * We do not allow a name to start with `:`. + * See [3] and Appendix B of XML 1.0 specification + */ + def isNameStart(ch: Char) = { + import java.lang.Character._ + + getType(ch).toByte match { + case LOWERCASE_LETTER | + UPPERCASE_LETTER | OTHER_LETTER | + TITLECASE_LETTER | LETTER_NUMBER => true + case _ => ch == '_' + } + } + + /** {{{ + * Name ::= ( Letter | '_' ) (NameChar)* + * }}} + * See [5] of XML 1.0 specification. + */ + def isName(s: String) = + s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) + +} + |