summaryrefslogtreecommitdiff
path: root/src/library
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2009-12-17 20:36:00 +0000
committerPaul Phillips <paulp@improving.org>2009-12-17 20:36:00 +0000
commit05363648a6f97f3a2175200484bb46df9f9548d5 (patch)
tree63a01b670f3545b0a37c1ebb6126ab56f0c66268 /src/library
parenta25195fc1f025c0c9515b10af20c9327b0733ffa (diff)
downloadscala-05363648a6f97f3a2175200484bb46df9f9548d5.tar.gz
scala-05363648a6f97f3a2175200484bb46df9f9548d5.tar.bz2
scala-05363648a6f97f3a2175200484bb46df9f9548d5.zip
Began the process of consolidating all the code...
Began the process of consolidating all the code which is painfully duplicated between MarkupParsers and MarkupParser. This motivated by the reappearance of bug #2354 in the exact same form as the one I already fixed. no review.
Diffstat (limited to 'src/library')
-rw-r--r--src/library/scala/xml/parsing/MarkupParser.scala234
-rw-r--r--src/library/scala/xml/parsing/MarkupParserCommon.scala51
2 files changed, 112 insertions, 173 deletions
diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala
index 7219051dab..ff19bd3ed5 100644
--- a/src/library/scala/xml/parsing/MarkupParser.scala
+++ b/src/library/scala/xml/parsing/MarkupParser.scala
@@ -6,9 +6,6 @@
** |/ **
\* */
-// $Id$
-
-
package scala.xml
package parsing
@@ -30,10 +27,14 @@ import Utility.Escapes.{ pairs => unescape }
* @author Burak Emir
* @version 1.0
*/
-trait MarkupParser extends AnyRef with TokenTests
+trait MarkupParser extends MarkupParserCommon with TokenTests
{
self: MarkupParser with MarkupHandler =>
+ type PositionType = Int
+
+ def xHandleError(that: Char, msg: String) = reportSyntaxError(msg)
+
val input: Source
/** if true, does not remove surplus whitespace */
@@ -239,8 +240,6 @@ trait MarkupParser extends AnyRef with TokenTests
/** append Unicode character to name buffer*/
protected def putChar(c: Char) = cbuf.append(c)
- //var xEmbeddedBlock = false;
-
/** As the current code requires you to call nextch once manually
* after construction, this method formalizes that suboptimal reality.
*/
@@ -250,7 +249,7 @@ trait MarkupParser extends AnyRef with TokenTests
}
/** this method assign the next character to ch and advances in input */
- def nextch {
+ def nextch = {
if (curInput.hasNext) {
ch = curInput.next
pos = curInput.pos
@@ -265,23 +264,9 @@ trait MarkupParser extends AnyRef with TokenTests
ch = 0.asInstanceOf[Char]
}
}
+ ch
}
- //final val enableEmbeddedExpressions: Boolean = false;
-
- /** munch expected XML token, report syntax error for unexpected
- */
- def xToken(that: Char) {
- if (ch == that)
- nextch
- else {
- reportSyntaxError("'" + that + "' expected instead of '" + ch + "'")
- error("FATAL")
- }
- }
-
- def xToken(that: Seq[Char]): Unit = that foreach xToken
-
/** parse attribute and create namespace scope, metadata
* [41] Attributes ::= { S Name Eq AttValue }
*/
@@ -469,42 +454,32 @@ trait MarkupParser extends AnyRef with TokenTests
def content(pscope: NamespaceBinding): NodeSeq = {
var ts = new NodeBuffer
var exit = eof
- while (! exit) {
- //Console.println("in content, ch = '"+ch+"' line="+scala.io.Position.line(pos));
- /* if( xEmbeddedBlock ) {
- ts.append( xEmbeddedExpr );
- } else {*/
- tmppos = pos;
- exit = eof;
- if(!eof)
- ch match {
- case '<' => // another tag
- //Console.println("before ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos);
- nextch;
- //Console.println("after ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos);
-
- if('/' ==ch)
- exit = true; // end tag
- else
- content1(pscope, ts)
- //case '{' =>
-/* if( xCheckEmbeddedBlock ) {
- ts.appendAll(xEmbeddedExpr);
- } else {*/
- // val str = new StringBuilder("{");
- // str.append(xText);
- // appendText(tmppos, ts, str.toString());
- /*}*/
- // postcond: xEmbeddedBlock == false!
- case '&' => // EntityRef or CharRef
- nextch;
- if (ch == '#') { // CharacterRef
+ // todo: optimize seq repr.
+ def done = new NodeSeq { val theSeq = ts.toList }
+
+ while (!exit) {
+ tmppos = pos
+ exit = eof
+
+ if (eof)
+ return done
+
+ ch match {
+ case '<' => // another tag
+ nextch match {
+ case '/' => exit = true // end tag
+ case _ => content1(pscope, ts)
+ }
+
+ // postcond: xEmbeddedBlock == false!
+ case '&' => // EntityRef or CharRef
+ nextch match {
+ case '#' => // CharacterRef
nextch
val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch))
xToken(';');
ts &+ theChar
- }
- else { // EntityRef
+ case _ => // EntityRef
val n = xName
xToken(';')
@@ -512,17 +487,12 @@ trait MarkupParser extends AnyRef with TokenTests
handle.entityRef(tmppos, n)
ts &+ unescape(n)
} else push(n)
- }
- case _ => // text content
- appendText(tmppos, ts, xText);
}
- /*}*/
- }
- val list = ts.toList
- // 2do: optimize seq repr.
- new NodeSeq {
- val theSeq = list
+ case _ => // text content
+ appendText(tmppos, ts, xText);
+ }
}
+ done
} // content(NamespaceBinding)
/** externalID ::= SYSTEM S syslit
@@ -572,47 +542,17 @@ trait MarkupParser extends AnyRef with TokenTests
if ((null != extID) && isValidating) {
pushExternal(extID.systemId)
- //val extSubsetSrc = externalSource( extID.systemId );
-
extIndex = inpStack.length
- /*
- .indexOf(':') != -1) { // assume URI
- Source.fromFile(new java.net.URI(extID.systemLiteral));
- } else {
- Source.fromFile(extID.systemLiteral);
- }
- */
- //Console.println("I'll print it now");
- //val old = curInput;
- //tmppos = curInput.pos;
- //val oldch = ch;
- //curInput = extSubsetSrc;
- //pos = 0;
- //nextch;
extSubset()
-
pop()
-
extIndex = -1
-
- //curInput = old;
- //pos = curInput.pos;
- //ch = curInput.ch;
- //eof = false;
- //while(extSubsetSrc.hasNext)
- //Console.print(extSubsetSrc.next);
-
- //Console.println("returned from external, current ch = "+ch )
}
if ('[' == ch) { // internal subset
nextch
/* TODO */
- //Console.println("hello");
intSubset()
- //while(']' != ch)
- // nextch;
// TODO: do the DTD parsing?? ?!?!?!?!!
xToken(']')
xSpaceOpt
@@ -639,15 +579,14 @@ trait MarkupParser extends AnyRef with TokenTests
*/
def element1(pscope: NamespaceBinding): NodeSeq = {
val pos = this.pos
- val Tuple3(qname, aMap, scope) = xTag(pscope)
- val Tuple2(pre, local) = Utility.prefix(qname) match {
- case Some(p) => (p,qname.substring(p.length+1, qname.length))
- case _ => (null,qname)
+ val (qname, aMap, scope) = xTag(pscope)
+ val (pre, local) = Utility.prefix(qname) match {
+ case Some(p) => (p, qname drop p.length)
+ case _ => (null, qname)
}
val ts = {
if (ch == '/') { // empty element
- xToken('/')
- xToken('>')
+ xToken("/>")
handle.elemStart(pos, pre, local, aMap, scope)
NodeSeq.Empty
}
@@ -685,17 +624,6 @@ trait MarkupParser extends AnyRef with TokenTests
}
}
- /** scan [S] '=' [S]*/
- def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt }
-
- /** skip optional space S? */
- def xSpaceOpt = while (isSpace(ch) && !eof) { nextch; }
-
- /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
- def xSpace =
- if (isSpace(ch)) { nextch; xSpaceOpt }
- else reportSyntaxError("whitespace expected")
-
/** '&lt;?' ProcInstr ::= Name [S ({Char} - ({Char}'&gt;?' {Char})]'?&gt;'
*
* see [15]
@@ -715,8 +643,7 @@ trait MarkupParser extends AnyRef with TokenTests
nextch
}
};
- xToken('?')
- xToken('>')
+ xToken("?>")
handle.procInstr(tmppos, n, sb.toString)
}
@@ -724,28 +651,17 @@ trait MarkupParser extends AnyRef with TokenTests
* precondition: xEmbeddedBlock == false (we are not in a scala block)
*/
def xText: String = {
- //if( xEmbeddedBlock ) throw FatalError("internal error: encountered embedded block"); // assert
-
- /*if( xCheckEmbeddedBlock )
- return ""
- else {*/
- //Console.println("in xText! ch = '"+ch+"'");
- var exit = false;
- while (! exit) {
- //Console.println("LOOP in xText! ch = '"+ch+"' + pos="+pos);
- putChar(ch);
- val opos = pos;
- nextch;
-
- //Console.println("STILL LOOP in xText! ch = '"+ch+"' + pos="+pos+" opos="+opos);
-
+ var exit = false;
+ while (! exit) {
+ putChar(ch);
+ val opos = pos;
+ nextch;
- exit = eof || /*{ nextch; xCheckEmbeddedBlock }||*/( ch == '<' ) || ( ch == '&' );
- }
- val str = cbuf.toString();
- cbuf.length = 0;
- str
- /*}*/
+ exit = eof || ( ch == '<' ) || ( ch == '&' )
+ }
+ val str = cbuf.toString();
+ cbuf.length = 0;
+ str
}
/** attribute value, terminated by either ' or ". value may not contain &lt;.
@@ -767,7 +683,6 @@ trait MarkupParser extends AnyRef with TokenTests
str
}
-
/* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */
def pubidLiteral(): String = {
val endch = ch
@@ -846,34 +761,16 @@ trait MarkupParser extends AnyRef with TokenTests
val ent = xName
xToken(';')
xSpaceOpt
- /*
- Console.println("hello, pushing!");
- {
- val test = replacementText(ent);
- while(test.hasNext)
- Console.print(test.next);
- } */
+
push(ent)
xSpaceOpt
- //Console.println("hello, getting name");
val stmt = xName
- //Console.println("hello, got name");
xSpaceOpt
- //Console.println("how can we be eof = "+eof);
-
- // eof = true because not external?!
- //if(!eof)
- // error("expected only INCLUDE or IGNORE");
- //pop();
-
- //Console.println("hello, popped");
stmt match {
// parameter entity
- case "INCLUDE" =>
- doInclude()
- case "IGNORE" =>
- doIgnore()
+ case "INCLUDE" => doInclude()
+ case "IGNORE" => doIgnore()
}
case 'I' =>
nextch
@@ -958,11 +855,10 @@ trait MarkupParser extends AnyRef with TokenTests
val n = xName
xSpace
var attList: List[AttrDecl] = Nil
+
// later: find the elemDecl for n
while ('>' != ch) {
val aname = xName
- //Console.println("attribute name: "+aname);
- var defdecl: DefaultDecl = null
xSpace
// could be enumeration (foo,bar) parse this later :-/
while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) {
@@ -972,29 +868,24 @@ trait MarkupParser extends AnyRef with TokenTests
}
val atpe = cbuf.toString()
cbuf.length = 0
- //Console.println("attr type: "+atpe);
- ch match {
+
+ val defdecl: DefaultDecl = ch match {
case '\'' | '"' =>
- val defValue = xAttributeValue() // default value
- defdecl = DEFAULT(false, defValue)
+ DEFAULT(false, xAttributeValue())
case '#' =>
nextch
xName match {
- case "FIXED" =>
- xSpace
- val defValue = xAttributeValue() // default value
- defdecl = DEFAULT(true, defValue)
- case "IMPLIED" =>
- defdecl = IMPLIED
- case "REQUIRED" =>
- defdecl = REQUIRED
+ case "FIXED" => xSpace ; DEFAULT(true, xAttributeValue())
+ case "IMPLIED" => IMPLIED
+ case "REQUIRED" => REQUIRED
}
case _ =>
+ null
}
xSpaceOpt
- attList = AttrDecl(aname, atpe, defdecl) :: attList
+ attList ::= AttrDecl(aname, atpe, defdecl)
cbuf.length = 0
}
nextch
@@ -1086,9 +977,6 @@ trait MarkupParser extends AnyRef with TokenTests
def reportValidationError(pos: Int, str: String): Unit = reportSyntaxError(pos, str)
def push(entityName: String) {
- //Console.println("BEFORE PUSHING "+ch)
- //Console.println("BEFORE PUSHING "+pos)
- //Console.print("[PUSHING "+entityName+"]")
if (!eof)
inpStack = curInput :: inpStack
diff --git a/src/library/scala/xml/parsing/MarkupParserCommon.scala b/src/library/scala/xml/parsing/MarkupParserCommon.scala
new file mode 100644
index 0000000000..c4ba2ccf15
--- /dev/null
+++ b/src/library/scala/xml/parsing/MarkupParserCommon.scala
@@ -0,0 +1,51 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2010, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala.xml
+package parsing
+
+import scala.io.Source
+import scala.xml.dtd._
+import Utility.Escapes.{ pairs => unescape }
+
+/** This is not a public trait - it contains common code shared
+ * between the library level XML parser and the compiler's.
+ * All members should be accessed through those.
+ */
+private[scala] trait MarkupParserCommon extends TokenTests {
+ // type InputType // Source, CharArrayReader
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
+ // type PositionType // Int, Position
+
+ def ch: Char
+ def nextch: Char
+ def xHandleError(that: Char, msg: String): Unit
+ def reportSyntaxError(str: String): Unit
+ def reportSyntaxError(pos: Int, str: String): Unit
+ def eof: Boolean
+
+ def xToken(that: Char) {
+ if (ch == that) nextch
+ else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+ }
+ def xToken(that: Seq[Char]) { that foreach xToken }
+
+ /** scan [S] '=' [S]*/
+ def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt }
+
+ /** skip optional space S? */
+ def xSpaceOpt = while (isSpace(ch) && !eof) nextch
+
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpace =
+ if (isSpace(ch)) { nextch; xSpaceOpt }
+ else xHandleError(ch, "whitespace expected")
+
+ //
+ def returning[T](x: T)(f: T => Unit): T = { f(x) ; x }
+}