summaryrefslogtreecommitdiff
path: root/src/library
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2010-01-18 21:18:36 +0000
committerPaul Phillips <paulp@improving.org>2010-01-18 21:18:36 +0000
commit135d4f06b174aa585af64b5253aba647982ac4a2 (patch)
tree73ca84ae254f4903feef03f7573172f992ca7e99 /src/library
parente83ad1e005d40738f87da8bb2d60cf9035cfb6ca (diff)
downloadscala-135d4f06b174aa585af64b5253aba647982ac4a2.tar.gz
scala-135d4f06b174aa585af64b5253aba647982ac4a2.tar.bz2
scala-135d4f06b174aa585af64b5253aba647982ac4a2.zip
More work consolidating the XML code needlessly...
More work consolidating the XML code needlessly duplicated between the compiler and the library. Having to fix #2354 in two completely different places was I found very motivating.
Diffstat (limited to 'src/library')
-rw-r--r--src/library/scala/xml/parsing/MarkupParser.scala178
-rw-r--r--src/library/scala/xml/parsing/MarkupParserCommon.scala180
2 files changed, 192 insertions, 166 deletions
diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala
index a15cd0f7e4..2f7f48c765 100644
--- a/src/library/scala/xml/parsing/MarkupParser.scala
+++ b/src/library/scala/xml/parsing/MarkupParser.scala
@@ -32,7 +32,13 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
self: MarkupParser with MarkupHandler =>
type PositionType = Int
- type InputType = Source
+ type InputType = Source
+ type ElementType = NodeSeq
+ type AttributesType = (MetaData, NamespaceBinding)
+ type NamespaceType = NamespaceBinding
+
+ def truncatedError(msg: String): Nothing = throw FatalError(msg)
+ def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag)
def xHandleError(that: Char, msg: String) = reportSyntaxError(msg)
@@ -106,8 +112,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
* // this is a bit more lenient than necessary...
*/
def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = {
-
- //Console.println("(DEBUG) prolog")
var n = 0
var info_ver: Option[String] = None
var info_enc: Option[String] = None
@@ -176,7 +180,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
if (m.length - n != 0) {
reportSyntaxError("VersionInfo EncodingDecl? or '?>' expected!");
}
- //Console.println("[MarkupParser::textDecl] finished parsing textdecl");
Tuple2(info_ver, info_enc);
}
@@ -190,8 +193,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
*/
def document(): Document = {
-
- //Console.println("(DEBUG) document")
doc = new Document()
this.dtd = null
@@ -204,7 +205,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
nextch // is prolog ?
var children: NodeSeq = null
if ('?' == ch) {
- //Console.println("[MarkupParser::document] starts with xml declaration");
nextch;
info_prolog = prolog()
doc.version = info_prolog._1
@@ -212,10 +212,8 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
doc.standAlone = info_prolog._3
children = content(TopScope) // DTD handled as side effect
- } else {
- //Console.println("[MarkupParser::document] does not start with xml declaration");
- //
-
+ }
+ else {
val ts = new NodeBuffer();
content1(TopScope, ts); // DTD handled as side effect
ts &+ content(TopScope);
@@ -257,6 +255,14 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
this
}
+ def ch_returning_nextch = { val res = ch ; nextch ; res }
+ def mkProcInstr(position: Int, name: String, text: String): NodeSeq =
+ handle.procInstr(position, name, text)
+
+ def mkAttributes(name: String, pscope: NamespaceBinding) =
+ if (isNameStart (ch)) xAttributes(pscope)
+ else (Null, pscope)
+
/** this method assign the next character to ch and advances in input */
def nextch = {
if (curInput.hasNext) {
@@ -315,27 +321,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
(aMap,scope)
}
- /** attribute value, terminated by either ' or ". value may not contain &lt;.
- * AttValue ::= `'` { _ } `'`
- * | `"` { _ } `"`
- */
- def xAttributeValue(): String = {
- val endch = ch
- nextch
- while (ch != endch) {
- if ('<' == ch)
- reportSyntaxError( "'<' not allowed in attrib value" );
- putChar(ch)
- nextch
- }
- nextch
- val str = cbuf.toString()
- cbuf.length = 0
-
- // well-formedness constraint
- normalizeAttributeValue(str)
- }
-
/** entity value, terminated by either ' or ". value may not contain &lt;.
* AttValue ::= `'` { _ } `'`
* | `"` { _ } `"`
@@ -353,35 +338,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
str
}
-
- /** parse a start or empty tag.
- * [40] STag ::= '&lt;' Name { S Attribute } [S]
- * [44] EmptyElemTag ::= '&lt;' Name { S Attribute } [S]
- */
- protected def xTag(pscope:NamespaceBinding): (String, MetaData, NamespaceBinding) = {
- val qname = xName
-
- xSpaceOpt
- val (aMap: MetaData, scope: NamespaceBinding) = {
- if (isNameStart(ch))
- xAttributes(pscope)
- else
- (Null, pscope)
- }
- (qname, aMap, scope)
- }
-
- /** [42] '&lt;' xmlEndTag ::= '&lt;' '/' Name S? '&gt;'
- */
- def xEndTag(n: String) = {
- xToken('/')
- val m = xName
- if (n != m)
- reportSyntaxError("expected closing tag of " + n/* +", not "+m*/);
- xSpaceOpt
- xToken('>')
- }
-
/** '&lt;! CharData ::= [CDATA[ ( {char} - {char}"]]&gt;"{char} ) ']]&gt;'
*
* see [15]
@@ -392,14 +348,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
xTakeUntil(mkResult, () => pos, "]]>")
}
- /** CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
- * | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
- *
- * see [66]
- */
- def xCharRef(ch: () => Char, nextch: () => Unit): String =
- Utility.parseCharRef(ch, nextch, reportSyntaxError _)
-
/** Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* '--&gt;'
*
* see [15]
@@ -576,7 +524,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
*/
def element1(pscope: NamespaceBinding): NodeSeq = {
val pos = this.pos
- val (qname, aMap, scope) = xTag(pscope)
+ val (qname, (aMap, scope)) = xTag(pscope)
val (pre, local) = Utility.prefix(qname) match {
case Some(p) => (p, qname drop p.length+1)
case _ => (null, qname)
@@ -600,50 +548,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
res
}
- //def xEmbeddedExpr: MarkupType;
-
- /** Name ::= (Letter | '_' | ':') (NameChar)*
- *
- * see [5] of XML 1.0 specification
- */
- def xName: String = {
- if (isNameStart(ch)) {
- while (isNameChar(ch)) {
- putChar(ch)
- nextch
- }
- val n = cbuf.toString().intern()
- cbuf.length = 0
- n
- } else {
- reportSyntaxError("name expected")
- ""
- }
- }
-
- /** '&lt;?' ProcInstr ::= Name [S ({Char} - ({Char}'&gt;?' {Char})]'?&gt;'
- *
- * see [15]
- */
- def xProcInstr: NodeSeq = {
- val sb:StringBuilder = new StringBuilder()
- val n = xName
- if (isSpace(ch)) {
- xSpace
- while (true) {
- if (ch == '?' && { sb.append( ch ); nextch; ch == '>' }) {
- sb.length = sb.length - 1;
- nextch;
- return handle.procInstr(tmppos, n, sb.toString);
- } else
- sb.append(ch);
- nextch
- }
- };
- xToken("?>")
- handle.procInstr(tmppos, n, sb.toString)
- }
-
/** parse character data.
* precondition: xEmbeddedBlock == false (we are not in a scala block)
*/
@@ -996,50 +900,4 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
pos = curInput.pos
eof = false // must be false, because of places where entity refs occur
}
-
- /** for the moment, replace only character references
- * see spec 3.3.3
- * precond: cbuf empty
- */
- def normalizeAttributeValue(attval: String): String = {
- val s: Seq[Char] = attval
- val it = s.iterator
- while (it.hasNext) {
- it.next match {
- case ' '|'\t'|'\n'|'\r' =>
- cbuf.append(' ');
- case '&' => it.next match {
- case '#' =>
- var c = it.next
- val s = xCharRef ({ () => c }, { () => c = it.next })
- cbuf.append(s)
- case nchar =>
- val nbuf = new StringBuilder()
- var d = nchar
- do {
- nbuf.append(d)
- d = it.next
- } while(d != ';');
- nbuf.toString() match {
- case "lt" => cbuf.append('<')
- case "gt" => cbuf.append('>')
- case "amp" => cbuf.append('&')
- case "apos" => cbuf.append('\'')
- case "quot" => cbuf.append('"')
- case "quote" => cbuf.append('"')
- case name =>
- cbuf.append('&')
- cbuf.append(name)
- cbuf.append(';')
- }
- }
- case c =>
- cbuf.append(c)
- }
- }
- val name = cbuf.toString()
- cbuf.length = 0
- name
- }
-
}
diff --git a/src/library/scala/xml/parsing/MarkupParserCommon.scala b/src/library/scala/xml/parsing/MarkupParserCommon.scala
index 57c46c4685..ba1402d55f 100644
--- a/src/library/scala/xml/parsing/MarkupParserCommon.scala
+++ b/src/library/scala/xml/parsing/MarkupParserCommon.scala
@@ -11,30 +11,191 @@ package parsing
import scala.io.Source
import scala.xml.dtd._
+import scala.annotation.switch
import Utility.Escapes.{ pairs => unescape }
+object MarkupParserCommon {
+ final val SU = '\u001A'
+}
+import MarkupParserCommon._
+
/** This is not a public trait - it contains common code shared
* between the library level XML parser and the compiler's.
* All members should be accessed through those.
*/
private[scala] trait MarkupParserCommon extends TokenTests {
- private final val SU: Char = 0x1A
protected def unreachable = Predef.error("Cannot be reached.")
- // type HandleType // MarkupHandler, SymbolicXMLBuilder
-
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
type InputType // Source, CharArrayReader
type PositionType // Int, Position
+ type ElementType // NodeSeq, Tree
+ type NamespaceType // NamespaceBinding, Any
+ type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+ def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+ def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+ xSpaceOpt
+
+ (name, mkAttributes(name, pscope))
+ }
+
+ /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+ *
+ * see [15]
+ */
+ def xProcInstr: ElementType = {
+ val n = xName
+ xSpaceOpt
+ xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+ }
+
+ /** attribute value, terminated by either ' or ". value may not contain <.
+ * @param endch either ' or "
+ */
+ def xAttributeValue(endCh: Char): String = {
+ val buf = new StringBuilder
+ while (ch != endCh) {
+ // well-formedness constraint
+ if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+ else if (ch == SU) truncatedError("")
+ else buf append ch_returning_nextch
+ }
+ ch_returning_nextch
+ // @todo: normalize attribute value
+ buf.toString
+ }
+
+ def xAttributeValue(): String = {
+ val str = xAttributeValue(ch_returning_nextch)
+ // well-formedness constraint
+ normalizeAttributeValue(str)
+ }
+
+ private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+ val buf = new StringBuilder
+ while (it.hasNext) it.next match {
+ case `end` => return buf.toString
+ case ch => buf append ch
+ }
+ error("Expected '%s'".format(end))
+ }
+
+ /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
+ */
+ def xEndTag(startName: String) {
+ xToken('/')
+ if (xName != startName)
+ errorNoEnd(startName)
+
+ xSpaceOpt
+ xToken('>')
+ }
+
+ /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
+ * Name ::= (Letter | '_') (NameChar)*
+ *
+ * see [5] of XML 1.0 specification
+ *
+ * pre-condition: ch != ':' // assured by definition of XMLSTART token
+ * post-condition: name does neither start, nor end in ':'
+ */
+ def xName: String = {
+ if (ch == SU)
+ truncatedError("")
+ else if (!isNameStart(ch))
+ return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+ val buf = new StringBuilder
+
+ do buf append ch_returning_nextch
+ while (isNameChar(ch))
+
+ if (buf.last == ':') {
+ reportSyntaxError( "name cannot end in ':'" )
+ buf.toString dropRight 1
+ }
+ else buf.toString
+ }
+
+ private def attr_unescape(s: String) = s match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case "quote" => "\""
+ case _ => "&" + s + ";"
+ }
+
+ /** Replaces only character references right now.
+ * see spec 3.3.3
+ */
+ private def normalizeAttributeValue(attval: String): String = {
+ val buf = new StringBuilder
+ val it = attval.iterator.buffered
+
+ while (it.hasNext) buf append (it.next match {
+ case ' ' | '\t' | '\n' | '\r' => " "
+ case '&' if it.head == '#' => it.next ; xCharRef(it)
+ case '&' => attr_unescape(takeUntilChar(it, ';'))
+ case c => c
+ })
+
+ buf.toString
+ }
+
+ /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+ * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ *
+ * see [66]
+ */
+ def xCharRef(ch: () => Char, nextch: () => Unit): String =
+ Utility.parseCharRef(ch, nextch, reportSyntaxError _)
+
+ def xCharRef(it: Iterator[Char]): String = {
+ var c = it.next
+ Utility.parseCharRef(() => c, () => { c = it.next }, reportSyntaxError _)
+ }
+
+ def xCharRef: String = xCharRef(() => ch, () => nextch)
/** Create a lookahead reader which does not influence the input */
def lookahead(): BufferedIterator[Char]
+ /** The library and compiler parsers had the interesting distinction of
+ * different behavior for nextch (a function for which there are a total
+ * of two plausible behaviors, so we know the design space was fully
+ * explored.) One of them returned the value of nextch before the increment
+ * and one of them the new value. So to unify code we have to at least
+ * temporarily abstract over the nextchs.
+ */
def ch: Char
def nextch: Char
+ def ch_returning_nextch: Char
+ def eof: Boolean
+
+ // def handle: HandleType
+ var tmppos: PositionType
+
def xHandleError(that: Char, msg: String): Unit
def reportSyntaxError(str: String): Unit
def reportSyntaxError(pos: Int, str: String): Unit
- def eof: Boolean
+
+ def truncatedError(msg: String): Nothing
+ def errorNoEnd(tag: String): Nothing
+
+ protected def errorAndResult[T](msg: String, x: T): T = {
+ reportSyntaxError(msg)
+ x
+ }
def xToken(that: Char) {
if (ch == that) nextch
@@ -53,9 +214,16 @@ private[scala] trait MarkupParserCommon extends TokenTests {
if (isSpace(ch)) { nextch; xSpaceOpt }
else xHandleError(ch, "whitespace expected")
- //
+ /** Apply a function and return the passed value */
def returning[T](x: T)(f: T => Unit): T = { f(x) ; x }
+ /** Execute body with a variable saved and restored after execution */
+ def saving[A,B](getter: A, setter: (A) => Unit)(body: => B): B = {
+ val saved = getter
+ try body
+ finally setter(saved)
+ }
+
/** Take characters from input stream until given String "until"
* is seen. Once seen, the accumulated characters are passed
* along with the current Position to the supplied handler function.
@@ -73,7 +241,7 @@ private[scala] trait MarkupParserCommon extends TokenTests {
if (ch == head && peek(rest))
return handler(positioner(), sb.toString)
else if (ch == SU)
- xHandleError(ch, "") // throws TruncatedXML in compiler
+ truncatedError("") // throws TruncatedXML in compiler
sb append ch
nextch