diff options
author | Paul Phillips <paulp@improving.org> | 2009-12-17 20:36:00 +0000 |
---|---|---|
committer | Paul Phillips <paulp@improving.org> | 2009-12-17 20:36:00 +0000 |
commit | 05363648a6f97f3a2175200484bb46df9f9548d5 (patch) | |
tree | 63a01b670f3545b0a37c1ebb6126ab56f0c66268 /src | |
parent | a25195fc1f025c0c9515b10af20c9327b0733ffa (diff) | |
download | scala-05363648a6f97f3a2175200484bb46df9f9548d5.tar.gz scala-05363648a6f97f3a2175200484bb46df9f9548d5.tar.bz2 scala-05363648a6f97f3a2175200484bb46df9f9548d5.zip |
Began the process of consolidating all the code...
Began the process of consolidating all the code which is painfully
duplicated between MarkupParsers and MarkupParser. This motivated by the
reappearance of bug #2354 in the exact same form as the one I already
fixed. no review.
Diffstat (limited to 'src')
3 files changed, 122 insertions, 196 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala index 67e1cd8304..bd46d2219d 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala @@ -50,10 +50,17 @@ trait MarkupParsers import global._ - class MarkupParser(parser: UnitParser, final val preserveWS: Boolean) { + class MarkupParser(parser: UnitParser, final val preserveWS: Boolean) extends scala.xml.parsing.MarkupParserCommon { import Tokens.{ EMPTY, LBRACE, RBRACE } + type PositionType = Position + val eof = false + + def xHandleError(that: Char, msg: String) = + if (ch == SU) throw TruncatedXML + else reportSyntaxError(msg) + var input : CharArrayReader = _ import parser.{ symbXMLBuilder => handle, o2p, r2p } @@ -73,15 +80,6 @@ trait MarkupParsers finally setter(saved) } - /** munch expected XML token, report syntax error for unexpected. - * - * @param that ... - */ - def xToken(that: Char): Unit = - if (ch == that) nextch - else if (ch == SU) throw TruncatedXML - else reportSyntaxError("'%s' expected instead of '%s'".format(that, ch)) - private var debugLastStartElement = new mutable.Stack[(Int, String)] private def debugLastPos = debugLastStartElement.top._1 private def debugLastElem = debugLastStartElement.top._2 @@ -421,18 +419,6 @@ trait MarkupParsers buf.toString.intern } - /** scan [S] '=' [S]*/ - def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt } - - /** skip optional space S? */ - def xSpaceOpt = { while (isSpace(ch)) { nextch }} - - /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ - def xSpace = - if (isSpace(ch)) { nextch; xSpaceOpt } - else if (ch == SU) throw TruncatedXML - else reportSyntaxError("whitespace expected") - /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>' * * see [15] @@ -558,8 +544,9 @@ trait MarkupParsers */ def xScalaPatterns: List[Tree] = escapeToScala(parser.patterns(true), "pattern") + def reportSyntaxError(pos: Int, str: String) = parser.syntaxError(pos, str) def reportSyntaxError(str: String) = { - parser.syntaxError(curOffset, "in XML literal: " + str) + reportSyntaxError(curOffset, "in XML literal: " + str) nextch } diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala index 7219051dab..ff19bd3ed5 100644 --- a/src/library/scala/xml/parsing/MarkupParser.scala +++ b/src/library/scala/xml/parsing/MarkupParser.scala @@ -6,9 +6,6 @@ ** |/ ** \* */ -// $Id$ - - package scala.xml package parsing @@ -30,10 +27,14 @@ import Utility.Escapes.{ pairs => unescape } * @author Burak Emir * @version 1.0 */ -trait MarkupParser extends AnyRef with TokenTests +trait MarkupParser extends MarkupParserCommon with TokenTests { self: MarkupParser with MarkupHandler => + type PositionType = Int + + def xHandleError(that: Char, msg: String) = reportSyntaxError(msg) + val input: Source /** if true, does not remove surplus whitespace */ @@ -239,8 +240,6 @@ trait MarkupParser extends AnyRef with TokenTests /** append Unicode character to name buffer*/ protected def putChar(c: Char) = cbuf.append(c) - //var xEmbeddedBlock = false; - /** As the current code requires you to call nextch once manually * after construction, this method formalizes that suboptimal reality. */ @@ -250,7 +249,7 @@ trait MarkupParser extends AnyRef with TokenTests } /** this method assign the next character to ch and advances in input */ - def nextch { + def nextch = { if (curInput.hasNext) { ch = curInput.next pos = curInput.pos @@ -265,23 +264,9 @@ trait MarkupParser extends AnyRef with TokenTests ch = 0.asInstanceOf[Char] } } + ch } - //final val enableEmbeddedExpressions: Boolean = false; - - /** munch expected XML token, report syntax error for unexpected - */ - def xToken(that: Char) { - if (ch == that) - nextch - else { - reportSyntaxError("'" + that + "' expected instead of '" + ch + "'") - error("FATAL") - } - } - - def xToken(that: Seq[Char]): Unit = that foreach xToken - /** parse attribute and create namespace scope, metadata * [41] Attributes ::= { S Name Eq AttValue } */ @@ -469,42 +454,32 @@ trait MarkupParser extends AnyRef with TokenTests def content(pscope: NamespaceBinding): NodeSeq = { var ts = new NodeBuffer var exit = eof - while (! exit) { - //Console.println("in content, ch = '"+ch+"' line="+scala.io.Position.line(pos)); - /* if( xEmbeddedBlock ) { - ts.append( xEmbeddedExpr ); - } else {*/ - tmppos = pos; - exit = eof; - if(!eof) - ch match { - case '<' => // another tag - //Console.println("before ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos); - nextch; - //Console.println("after ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos); - - if('/' ==ch) - exit = true; // end tag - else - content1(pscope, ts) - //case '{' => -/* if( xCheckEmbeddedBlock ) { - ts.appendAll(xEmbeddedExpr); - } else {*/ - // val str = new StringBuilder("{"); - // str.append(xText); - // appendText(tmppos, ts, str.toString()); - /*}*/ - // postcond: xEmbeddedBlock == false! - case '&' => // EntityRef or CharRef - nextch; - if (ch == '#') { // CharacterRef + // todo: optimize seq repr. + def done = new NodeSeq { val theSeq = ts.toList } + + while (!exit) { + tmppos = pos + exit = eof + + if (eof) + return done + + ch match { + case '<' => // another tag + nextch match { + case '/' => exit = true // end tag + case _ => content1(pscope, ts) + } + + // postcond: xEmbeddedBlock == false! + case '&' => // EntityRef or CharRef + nextch match { + case '#' => // CharacterRef nextch val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch)) xToken(';'); ts &+ theChar - } - else { // EntityRef + case _ => // EntityRef val n = xName xToken(';') @@ -512,17 +487,12 @@ trait MarkupParser extends AnyRef with TokenTests handle.entityRef(tmppos, n) ts &+ unescape(n) } else push(n) - } - case _ => // text content - appendText(tmppos, ts, xText); } - /*}*/ - } - val list = ts.toList - // 2do: optimize seq repr. - new NodeSeq { - val theSeq = list + case _ => // text content + appendText(tmppos, ts, xText); + } } + done } // content(NamespaceBinding) /** externalID ::= SYSTEM S syslit @@ -572,47 +542,17 @@ trait MarkupParser extends AnyRef with TokenTests if ((null != extID) && isValidating) { pushExternal(extID.systemId) - //val extSubsetSrc = externalSource( extID.systemId ); - extIndex = inpStack.length - /* - .indexOf(':') != -1) { // assume URI - Source.fromFile(new java.net.URI(extID.systemLiteral)); - } else { - Source.fromFile(extID.systemLiteral); - } - */ - //Console.println("I'll print it now"); - //val old = curInput; - //tmppos = curInput.pos; - //val oldch = ch; - //curInput = extSubsetSrc; - //pos = 0; - //nextch; extSubset() - pop() - extIndex = -1 - - //curInput = old; - //pos = curInput.pos; - //ch = curInput.ch; - //eof = false; - //while(extSubsetSrc.hasNext) - //Console.print(extSubsetSrc.next); - - //Console.println("returned from external, current ch = "+ch ) } if ('[' == ch) { // internal subset nextch /* TODO */ - //Console.println("hello"); intSubset() - //while(']' != ch) - // nextch; // TODO: do the DTD parsing?? ?!?!?!?!! xToken(']') xSpaceOpt @@ -639,15 +579,14 @@ trait MarkupParser extends AnyRef with TokenTests */ def element1(pscope: NamespaceBinding): NodeSeq = { val pos = this.pos - val Tuple3(qname, aMap, scope) = xTag(pscope) - val Tuple2(pre, local) = Utility.prefix(qname) match { - case Some(p) => (p,qname.substring(p.length+1, qname.length)) - case _ => (null,qname) + val (qname, aMap, scope) = xTag(pscope) + val (pre, local) = Utility.prefix(qname) match { + case Some(p) => (p, qname drop p.length) + case _ => (null, qname) } val ts = { if (ch == '/') { // empty element - xToken('/') - xToken('>') + xToken("/>") handle.elemStart(pos, pre, local, aMap, scope) NodeSeq.Empty } @@ -685,17 +624,6 @@ trait MarkupParser extends AnyRef with TokenTests } } - /** scan [S] '=' [S]*/ - def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt } - - /** skip optional space S? */ - def xSpaceOpt = while (isSpace(ch) && !eof) { nextch; } - - /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ - def xSpace = - if (isSpace(ch)) { nextch; xSpaceOpt } - else reportSyntaxError("whitespace expected") - /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>' * * see [15] @@ -715,8 +643,7 @@ trait MarkupParser extends AnyRef with TokenTests nextch } }; - xToken('?') - xToken('>') + xToken("?>") handle.procInstr(tmppos, n, sb.toString) } @@ -724,28 +651,17 @@ trait MarkupParser extends AnyRef with TokenTests * precondition: xEmbeddedBlock == false (we are not in a scala block) */ def xText: String = { - //if( xEmbeddedBlock ) throw FatalError("internal error: encountered embedded block"); // assert - - /*if( xCheckEmbeddedBlock ) - return "" - else {*/ - //Console.println("in xText! ch = '"+ch+"'"); - var exit = false; - while (! exit) { - //Console.println("LOOP in xText! ch = '"+ch+"' + pos="+pos); - putChar(ch); - val opos = pos; - nextch; - - //Console.println("STILL LOOP in xText! ch = '"+ch+"' + pos="+pos+" opos="+opos); - + var exit = false; + while (! exit) { + putChar(ch); + val opos = pos; + nextch; - exit = eof || /*{ nextch; xCheckEmbeddedBlock }||*/( ch == '<' ) || ( ch == '&' ); - } - val str = cbuf.toString(); - cbuf.length = 0; - str - /*}*/ + exit = eof || ( ch == '<' ) || ( ch == '&' ) + } + val str = cbuf.toString(); + cbuf.length = 0; + str } /** attribute value, terminated by either ' or ". value may not contain <. @@ -767,7 +683,6 @@ trait MarkupParser extends AnyRef with TokenTests str } - /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */ def pubidLiteral(): String = { val endch = ch @@ -846,34 +761,16 @@ trait MarkupParser extends AnyRef with TokenTests val ent = xName xToken(';') xSpaceOpt - /* - Console.println("hello, pushing!"); - { - val test = replacementText(ent); - while(test.hasNext) - Console.print(test.next); - } */ + push(ent) xSpaceOpt - //Console.println("hello, getting name"); val stmt = xName - //Console.println("hello, got name"); xSpaceOpt - //Console.println("how can we be eof = "+eof); - - // eof = true because not external?! - //if(!eof) - // error("expected only INCLUDE or IGNORE"); - //pop(); - - //Console.println("hello, popped"); stmt match { // parameter entity - case "INCLUDE" => - doInclude() - case "IGNORE" => - doIgnore() + case "INCLUDE" => doInclude() + case "IGNORE" => doIgnore() } case 'I' => nextch @@ -958,11 +855,10 @@ trait MarkupParser extends AnyRef with TokenTests val n = xName xSpace var attList: List[AttrDecl] = Nil + // later: find the elemDecl for n while ('>' != ch) { val aname = xName - //Console.println("attribute name: "+aname); - var defdecl: DefaultDecl = null xSpace // could be enumeration (foo,bar) parse this later :-/ while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) { @@ -972,29 +868,24 @@ trait MarkupParser extends AnyRef with TokenTests } val atpe = cbuf.toString() cbuf.length = 0 - //Console.println("attr type: "+atpe); - ch match { + + val defdecl: DefaultDecl = ch match { case '\'' | '"' => - val defValue = xAttributeValue() // default value - defdecl = DEFAULT(false, defValue) + DEFAULT(false, xAttributeValue()) case '#' => nextch xName match { - case "FIXED" => - xSpace - val defValue = xAttributeValue() // default value - defdecl = DEFAULT(true, defValue) - case "IMPLIED" => - defdecl = IMPLIED - case "REQUIRED" => - defdecl = REQUIRED + case "FIXED" => xSpace ; DEFAULT(true, xAttributeValue()) + case "IMPLIED" => IMPLIED + case "REQUIRED" => REQUIRED } case _ => + null } xSpaceOpt - attList = AttrDecl(aname, atpe, defdecl) :: attList + attList ::= AttrDecl(aname, atpe, defdecl) cbuf.length = 0 } nextch @@ -1086,9 +977,6 @@ trait MarkupParser extends AnyRef with TokenTests def reportValidationError(pos: Int, str: String): Unit = reportSyntaxError(pos, str) def push(entityName: String) { - //Console.println("BEFORE PUSHING "+ch) - //Console.println("BEFORE PUSHING "+pos) - //Console.print("[PUSHING "+entityName+"]") if (!eof) inpStack = curInput :: inpStack diff --git a/src/library/scala/xml/parsing/MarkupParserCommon.scala b/src/library/scala/xml/parsing/MarkupParserCommon.scala new file mode 100644 index 0000000000..c4ba2ccf15 --- /dev/null +++ b/src/library/scala/xml/parsing/MarkupParserCommon.scala @@ -0,0 +1,51 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2010, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.xml +package parsing + +import scala.io.Source +import scala.xml.dtd._ +import Utility.Escapes.{ pairs => unescape } + +/** This is not a public trait - it contains common code shared + * between the library level XML parser and the compiler's. + * All members should be accessed through those. + */ +private[scala] trait MarkupParserCommon extends TokenTests { + // type InputType // Source, CharArrayReader + // type HandleType // MarkupHandler, SymbolicXMLBuilder + // type PositionType // Int, Position + + def ch: Char + def nextch: Char + def xHandleError(that: Char, msg: String): Unit + def reportSyntaxError(str: String): Unit + def reportSyntaxError(pos: Int, str: String): Unit + def eof: Boolean + + def xToken(that: Char) { + if (ch == that) nextch + else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) + } + def xToken(that: Seq[Char]) { that foreach xToken } + + /** scan [S] '=' [S]*/ + def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt } + + /** skip optional space S? */ + def xSpaceOpt = while (isSpace(ch) && !eof) nextch + + /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ + def xSpace = + if (isSpace(ch)) { nextch; xSpaceOpt } + else xHandleError(ch, "whitespace expected") + + // + def returning[T](x: T)(f: T => Unit): T = { f(x) ; x } +} |