diff options
author | buraq <buraq@epfl.ch> | 2005-05-25 11:40:22 +0000 |
---|---|---|
committer | buraq <buraq@epfl.ch> | 2005-05-25 11:40:22 +0000 |
commit | 8bdf158f08081c4d5c99b1247e204c8766565ebd (patch) | |
tree | 146a8c1b2fdddb0095c9e38dcdf408c0cd51762d /sources | |
parent | 1af5b9aeedd8bbcc897077bd837b8b0827cf94f8 (diff) | |
download | scala-8bdf158f08081c4d5c99b1247e204c8766565ebd.tar.gz scala-8bdf158f08081c4d5c99b1247e204c8766565ebd.tar.bz2 scala-8bdf158f08081c4d5c99b1247e204c8766565ebd.zip |
improved handling of DTDs
Diffstat (limited to 'sources')
-rw-r--r-- | sources/scala/io/Source.scala | 23 | ||||
-rw-r--r-- | sources/scala/xml/dtd/DTD.scala | 26 | ||||
-rw-r--r-- | sources/scala/xml/dtd/ExternalID.scala | 30 | ||||
-rw-r--r-- | sources/scala/xml/parsing/ConstructingParser.scala | 39 | ||||
-rw-r--r-- | sources/scala/xml/parsing/MarkupHandler.scala | 44 | ||||
-rw-r--r-- | sources/scala/xml/parsing/MarkupParser.scala | 452 | ||||
-rw-r--r-- | sources/scala/xml/parsing/ValidatingMarkupHandler.scala | 11 |
7 files changed, 463 insertions, 162 deletions
diff --git a/sources/scala/io/Source.scala b/sources/scala/io/Source.scala index 3926655d27..65a2bcbff7 100644 --- a/sources/scala/io/Source.scala +++ b/sources/scala/io/Source.scala @@ -58,6 +58,11 @@ object Source { def fromFile(name: String, enc: String): Source = fromFile( new File( name ), enc); + /** creates Source from file with given file: URI + */ + def fromFile(uri: java.net.URI): Source = + fromFile(new File(uri)); + /** creates Source from file, using default character encoding, setting its * description to filename. */ @@ -66,12 +71,7 @@ object Source { val is = new FileInputStream( file ); is.read( arr ); val s = fromBytes(arr); - s.descr = new StringBuffer() - .append( file.getAbsolutePath() ) - .append( File.pathSeparator ) - .append( file.getName() ) - .toString(); - s + return setFileDescriptor(file,s); } /** creates Source from file, using given character encoding, setting its @@ -83,8 +83,17 @@ object Source { is.read( arr ); val s = fromBytes(arr, enc); s.descr = file.getName(); + return setFileDescriptor(file,s); + } + + def setFileDescriptor(file: File, s: Source): Source = { + s.descr = new StringBuffer() + .append( "file:" ) + .append( file.getAbsolutePath() ) + .toString(); s } + } /** an iterable representation of source files. @@ -113,7 +122,7 @@ abstract class Source extends Iterator[Char] { */ var ch: Char = _; - /** description of this source */ + /** description of this source, default empty */ var descr: String = ""; var nerrors = 0; diff --git a/sources/scala/xml/dtd/DTD.scala b/sources/scala/xml/dtd/DTD.scala index 2b13209c39..9cf55f7077 100644 --- a/sources/scala/xml/dtd/DTD.scala +++ b/sources/scala/xml/dtd/DTD.scala @@ -12,14 +12,11 @@ abstract class DTD { def unparsedEntities: Seq[EntityDecl] = Nil; - var elem: Map[String, ElemDecl] = - new HashMap[String, ElemDecl](); + var elem: Map[String, ElemDecl] = new HashMap[String, ElemDecl](); - var attr: Map[String, AttListDecl] = - new HashMap[String, AttListDecl](); + var attr: Map[String, AttListDecl] = new HashMap[String, AttListDecl](); - var ent: Map[String, EntityDecl] = - new HashMap[String, EntityDecl](); + var ent: Map[String, EntityDecl] = new HashMap[String, EntityDecl](); var decls: List[Decl] = Nil; @@ -37,24 +34,15 @@ abstract class DTD { sb.append("]").toString() } + /* def initializeEntities() = { for(val x <- decls) x match { - case y @ ParsedEntityDecl(name, _) => ent.update(name, y); + case y @ ParsedEntityDecl(name, _) => ent.update(name, y); case y @ UnparsedEntityDecl(name, _, _) => ent.update(name, y); - case y @ ParameterEntityDecl(name, _) => ent.update(name, y); + case y @ ParameterEntityDecl(name, _) => ent.update(name, y); case _ => } } - - def replacementText( entityName: String ): Source = { - ent.get(entityName) match { - case Some(ParsedEntityDecl(_, IntDef(value))) => - Source.fromString(value); - case Some(_) => - Source.fromString("<!-- "+entityName+"; -->"); - case None => - Source.fromString("<!-- unknown entity "+entityName+"; -->") - } - } + */ } diff --git a/sources/scala/xml/dtd/ExternalID.scala b/sources/scala/xml/dtd/ExternalID.scala index 5263bea0d8..965d330a99 100644 --- a/sources/scala/xml/dtd/ExternalID.scala +++ b/sources/scala/xml/dtd/ExternalID.scala @@ -24,6 +24,8 @@ abstract class ExternalID { /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */ def toString(sb: StringBuffer): StringBuffer; + def systemId: String; + } /** a system identifier @@ -32,18 +34,18 @@ abstract class ExternalID { * @param systemLiteral the system identifier literal **/ -case class SystemID( systemLiteral:String ) extends ExternalID with parsing.TokenTests{ +case class SystemID( systemId:String ) extends ExternalID with parsing.TokenTests{ - if( !checkSysID( systemLiteral ) ) + if( !checkSysID( systemId ) ) throw new IllegalArgumentException( "can't use both \" and ' in systemLiteral" ); /** returns " SYSTEM "+systemLiteral */ final override def toString() = - Utility.systemLiteralToString( systemLiteral ); + Utility.systemLiteralToString( systemId ); final def toString(sb: StringBuffer): StringBuffer = - Utility.systemLiteralToString( sb, systemLiteral ); + Utility.systemLiteralToString( sb, systemId ); } @@ -53,18 +55,18 @@ case class SystemID( systemLiteral:String ) extends ExternalID with parsing.Toke * @param publicLiteral the public identifier literal * @param systemLiteral (can be null for notation pubIDs) the system identifier literal **/ -case class PublicID( publicLiteral:String, systemLiteral:String ) extends ExternalID with parsing.TokenTests{ +case class PublicID( publicId:String, systemId:String ) extends ExternalID with parsing.TokenTests{ //Console.println("constructing PublicID \""+publicLiteral+"\" "+systemLiteral); //Console.println("util returns "+checkPubID( publicLiteral )); - if( !checkPubID( publicLiteral )) + if( !checkPubID( publicId )) throw new IllegalArgumentException( - "publicLiteral must consist of PubidChars" + "publicId must consist of PubidChars" ); - if( systemLiteral!= null && !checkSysID( systemLiteral ) ) + if( systemId != null && !checkSysID( systemId ) ) throw new IllegalArgumentException( - "can't use both \" and ' in systemLiteral" + "can't use both \" and ' in systemId" ); /** the constant "#PI" */ @@ -76,16 +78,16 @@ case class PublicID( publicLiteral:String, systemLiteral:String ) extends Extern /** always empty */ final def child = Nil; - /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */ + /** returns "PUBLIC "+publicId+" SYSTEM "+systemId */ final override def toString(): String = { toString(new StringBuffer()).toString(); } - /** appends "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral to argument */ + /** appends "PUBLIC "+publicId+" SYSTEM "+systemId to argument */ final def toString(sb: StringBuffer): StringBuffer = { - Utility.publicLiteralToString( sb, publicLiteral ).append(' '); - if(systemLiteral!=null) - Utility.systemLiteralToString( sb, systemLiteral ); + Utility.publicLiteralToString( sb, publicId ).append(' '); + if(systemId!=null) + Utility.systemLiteralToString( sb, systemId ); else sb } diff --git a/sources/scala/xml/parsing/ConstructingParser.scala b/sources/scala/xml/parsing/ConstructingParser.scala index c525a08c54..7cf3bd6226 100644 --- a/sources/scala/xml/parsing/ConstructingParser.scala +++ b/sources/scala/xml/parsing/ConstructingParser.scala @@ -9,7 +9,34 @@ package scala.xml.parsing ; +import scala.io.Source; + object ConstructingParser { + + + def fromFile(inp: java.io.File, preserveWS: Boolean) = { + /* DEBUG + val src = Source.fromFile(inp); + while(src.hasNext) { + Console.print(src.next); + if(!src.hasNext) { + Console.print("last character!"); + Console.print(src.ch); + } + } + */ + val p = new ConstructingParser(Source.fromFile(inp), preserveWS); + /* + { + override def externalSource(systemLiteral: String): Source = { + Source.fromFile(new java.io.File(inp.getParent(), systemLiteral)); + } + } + */ + p.nextch; + p + } + def fromSource(inp: scala.io.Source, preserveWS: Boolean) = { val p = new ConstructingParser(inp, preserveWS); p.nextch; @@ -19,12 +46,22 @@ object ConstructingParser { /** an xml parser. parses XML and invokes callback methods of a MarkupHandler */ -class ConstructingParser(inp: scala.io.Source, presWS:Boolean) +class ConstructingParser(inp: Source, presWS:Boolean) extends ConstructingHandler with MarkupParser { + override val isValidating = true; val preserveWS = presWS; val input = inp; val handle = this; + override def externalSource(systemLiteral: String): Source = { + var fileStr = inp.descr; + if(inp.descr.startsWith("file:")) { + fileStr = inp.descr.substring(5, inp.descr.length()); + } + fileStr = fileStr.substring(0,fileStr.lastIndexOf(java.io.File.separator)+1); + + Source.fromFile(fileStr + systemLiteral); + } } diff --git a/sources/scala/xml/parsing/MarkupHandler.scala b/sources/scala/xml/parsing/MarkupHandler.scala index 60b7027ee3..63fb090cc3 100644 --- a/sources/scala/xml/parsing/MarkupHandler.scala +++ b/sources/scala/xml/parsing/MarkupHandler.scala @@ -9,15 +9,22 @@ package scala.xml.parsing; +import scala.io.Source; +import scala.collection.mutable.{ HashMap, Map } import scala.xml.dtd._ ; +import scala.util.logging._; + /** class that handles markup - provides callback methods to MarkupParser. * the default is nonvalidating behaviour * * @todo can we ignore more entity declarations (i.e. those with extIDs)? * @todo expanding entity references */ -abstract class MarkupHandler { +abstract class MarkupHandler with Logged with ConsoleLogger { + + // impl. of Logged + //def log(msg:String) = {} /** returns true is this markup handler is validing */ val isValidating: Boolean = false; @@ -27,6 +34,21 @@ abstract class MarkupHandler { var decls: List[scala.xml.dtd.Decl] = Nil; + var ent: Map[String, EntityDecl] = new HashMap[String, EntityDecl](); + + def replacementText( entityName: String ): Source = { + ent.get(entityName) match { + case Some(ParsedEntityDecl(_, IntDef(value))) => + Source.fromString(value); + case Some(ParameterEntityDecl(_, IntDef(value))) => + Source.fromString(" "+value+" "); + case Some(_) => + Source.fromString("<!-- "+entityName+"; -->"); + case None => + Source.fromString("<!-- unknown entity "+entityName+"; -->") + } + } + /** callback method invoked by MarkupParser after parsing an element. * * @param pos the position in the sourcefile @@ -60,18 +82,26 @@ abstract class MarkupHandler { def attListDecl(name: String, attList: List[AttrDecl]): Unit = {} - def parameterEntityDecl(name: String, edef: EntityDef): Unit = edef match { - case _:ExtDef if !isValidating => - ; // ignore (cf REC-xml 4.4.1) - case _ => - decls = ParameterEntityDecl(name, edef) :: decls; + def parameterEntityDecl(name: String, edef: EntityDef): Unit = { + //log("parameterEntityDecl("+name+","+edef+")"); + edef match { + case _:ExtDef if !isValidating => + ; // ignore (cf REC-xml 4.4.1) + case _ => + val y = ParameterEntityDecl(name, edef); + decls = y :: decls; + ent.update(name, y); + //log("ent.get(..) = "+ent.get(name)); + } } def parsedEntityDecl(name: String, edef: EntityDef): Unit = edef match { case _:ExtDef if !isValidating => ; // ignore (cf REC-xml 4.8 and 4.4.1) case _ => - decls = ParsedEntityDecl(name, edef) :: decls; + val y = ParsedEntityDecl(name, edef); + decls = y :: decls; + ent.update(name, y) } def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = diff --git a/sources/scala/xml/parsing/MarkupParser.scala b/sources/scala/xml/parsing/MarkupParser.scala index 5c77d39bee..53b9e9e6a4 100644 --- a/sources/scala/xml/parsing/MarkupParser.scala +++ b/sources/scala/xml/parsing/MarkupParser.scala @@ -19,11 +19,16 @@ import scala.xml.dtd._ ; * and returns whatever the markup handler returns. Use * <code>ConstructingParser</code> if you just want to parse XML to * construct instances of <code>scala.xml.Node</code>. + * + * While XML elements are returned, DTD declarations - if handled - are + * collected using side-effects. */ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef with TokenTests { val input: Source; + def externalSource(systemLiteral: String): Source; + // // variables, values // @@ -39,6 +44,12 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi /** holds the position in the source file */ var pos: Int = _; + /* true if reading external sources */ + var isReadingExternal = false;; + + /* true if reading external subset */ + var inExtSubSet = false; + /** holds temporary values of pos */ var tmppos: Int = _; @@ -56,6 +67,19 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi // methods // + /** <? prolog ::= xml S ... ?> + */ + def xmlProcInstr(): MetaData = { + xToken("xml"); + xSpace; + val Pair(md,scp) = xAttributes(TopScope); + if(scp != TopScope) + reportSyntaxError("no xmlns definitions here, please."); + xToken('?'); + xToken('>'); + md + } + /** <? prolog ::= xml S */ def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = { @@ -66,51 +90,75 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi var info_enc: Option[String] = None; var info_stdl: Option[Boolean] = None; - xToken("xml"); - xSpace; - val Pair(md,scp) = xAttributes(TopScope); - xToken('?'); - xToken('>'); + var m = xmlProcInstr(); + xSpace; - if(TopScope == scp) { - var m = md; - - if (!m.isPrefixed && m.key == "version") { - if (m.value == "1.0") { - info_ver = Some("1.0"); - m = m.next; - } else { - reportSyntaxError("cannot deal with versions != 1.0"); - } - } else - reportSyntaxError("VersionInfo expected!"); - - if (!m.isPrefixed && m.key == "encoding") { - val enc = m.value; - if (!isValidIANAEncoding(enc)) - reportSyntaxError("\"" + enc + "\" is not a valid encoding"); - info_enc = Some(enc); - m = m.next + + if (!m.isPrefixed && m.key == "version") { + if (m.value == "1.0") { + info_ver = Some("1.0"); + m = m.next; + } else { + reportSyntaxError("cannot deal with versions != 1.0"); } + } else + reportSyntaxError("VersionInfo expected!"); + + if (!m.isPrefixed && m.key == "encoding") { + val enc = m.value; + if (!isValidIANAEncoding(enc)) + reportSyntaxError("\"" + enc + "\" is not a valid encoding"); + info_enc = Some(enc); + m = m.next + } - if (!m.isPrefixed && m.key == "standalone") { - m.value match { - case "yes" => - info_stdl = Some(true); - case "no" => - info_stdl = Some(false); - case _ => - reportSyntaxError("either 'yes' or 'no' expected"); - } - m = m.next + if (!m.isPrefixed && m.key == "standalone") { + m.value match { + case "yes" => + info_stdl = Some(true); + case "no" => + info_stdl = Some(false); + case _ => + reportSyntaxError("either 'yes' or 'no' expected"); } + m = m.next + } + + if (m != Null) + reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!"); + Tuple3(info_ver,info_enc,info_stdl) + } + + /** prolog, but without standalone */ + def textDecl(): Tuple2[Option[String],Option[String]] = { + + var info_ver: Option[String] = None; + var info_enc: Option[String] = None; + + var m = xmlProcInstr(); - if (m != Null) - reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!"); + if (!m.isPrefixed && m.key == "version") { + if (m.value == "1.0") { + info_ver = Some("1.0"); + m = m.next; + } else { + reportSyntaxError("cannot deal with versions != 1.0"); + } } else - reportSyntaxError("no xmlns definitions here, please"); + reportSyntaxError("VersionInfo expected!"); + + if (m != Null && !m.isPrefixed && m.key == "encoding") { + val enc = m.value; + if (!isValidIANAEncoding(enc)) + reportSyntaxError("\"" + enc + "\" is not a valid encoding"); + info_enc = Some(enc); + m = m.next + } - Tuple3(info_ver,info_enc,info_stdl) + if (m != Null) + reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!"); + + Tuple2(info_ver, info_enc); } /** @@ -152,7 +200,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi elemCount = elemCount + 2; case m:Node => elemCount = elemCount + 1; - theNode = m; + theNode = m; } if (1 != elemCount) { reportSyntaxError("document must contain exactly one element"); @@ -177,13 +225,21 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi /** this method assign the next character to ch and advances in input */ def nextch: Unit = { if (curInput.hasNext) { - ch = input.next; - pos = input.pos; + ch = curInput.next; + pos = curInput.pos; + } else { + //Console.println("nextch, curInput.hasNext == false ") ; + //Console.println("nextch, isReadingExternal == "+isReadingExternal); + //Console.println("nextch, Nil != inpStack == "+(Nil!=inpStack)); + if ((!isReadingExternal) && (Nil != inpStack)) { + /** for external source, we like to be notified of eof! */ + pop(); + } else { + eof = true; + ch = 0.asInstanceOf[Char]; + //throw new Exception("this is the end") + } } - else if (Nil != inpStack) - pop(); - else - eof = true; } //final val enableEmbeddedExpressions: Boolean = false; @@ -193,8 +249,10 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi def xToken(that: Char): Unit = { if (ch == that) nextch; - else + else { reportSyntaxError("'" + that + "' expected instead of '" + ch + "'"); + error("FATAL"); + } } def xToken(that: Seq[Char]): Unit = { @@ -274,6 +332,24 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi } + /** entity value, terminated by either ' or ". value may not contain <. + * AttValue ::= `'` { _ } `'` + * | `"` { _ } `"` + */ + def xEntityValue(): String = { + val endch = ch; + nextch; + while (ch != endch) { + putChar(ch); + nextch; + } + nextch; + val str = cbuf.toString(); + cbuf.setLength(0); + str + } + + /** parse a start or empty tag. * [40] STag ::= '<' Name { S Attribute } [S] * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] @@ -382,6 +458,26 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi } } + /** '<' content1 ::= ... */ + def content1(pscope: NamespaceBinding, ts: NodeBuffer): Unit = { + ch match { + case '!' => + nextch; + if ('[' == ch) // CDATA + ts + xCharData; + else if ('D' == ch) // doctypedecl, parse DTD + parseDTD(); + else // comment + ts + xComment; + case '?' => // PI + nextch; + ts + xProcInstr; + case _ => + ts + element1(pscope); // child + } + } + + /** content1 ::= '<' content1 | '&' charref ... */ def content(pscope: NamespaceBinding): NodeSeq = { var ts = new NodeBuffer; var exit = eof; @@ -399,24 +495,10 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi nextch; //Console.println("after ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos); - ch match { - case '/' => - exit = true; // end tag - case '!' => - nextch; - if ('[' == ch) // CDATA - ts + xCharData; - else if ('D' == ch) // doctypedecl, parse DTD - parseDTD(); - else // comment - ts + xComment; - case '?' => // PI - nextch; - ts + xProcInstr; - case _ => - ts + element1(pscope); // child - } - + if('/' ==ch) + exit = true; // end tag + else + content1(pscope, ts) //case '{' => /* if( xCheckEmbeddedBlock ) { ts.appendAll(xEmbeddedExpr); @@ -496,6 +578,44 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi extID = externalID(); xSpace; } + + /* parse external subset of DTD + */ + + if(null != extID) { + val extSubsetSrc = externalSource( extID.systemId ); + + isReadingExternal = true; + inExtSubSet = true; + /* + .indexOf(':') != -1) { // assume URI + Source.fromFile(new java.net.URI(extID.systemLiteral)); + } else { + Source.fromFile(extID.systemLiteral); + } + */ + //Console.println("I'll print it now"); + val old = curInput; + tmppos = curInput.pos; + val oldch = ch; + curInput = extSubsetSrc; + pos = 0; + nextch; + extSubset(); + + isReadingExternal = false; + inExtSubSet = false; + + curInput = old; + pos = curInput.pos; + ch = curInput.ch; + eof = false; + //while(extSubsetSrc.hasNext) + //Console.print(extSubsetSrc.next); + + //Console.println("returned from external, current ch = "+ch ) + } + if ('[' == ch) { // internal subset nextch; /* TODO */ @@ -512,7 +632,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi override var externalID = extID; override val decls = handle.decls.reverse; } - this.dtd.initializeEntities(); + //this.dtd.initializeEntities(); } def element(pscope: NamespaceBinding): NodeSeq = { @@ -556,10 +676,10 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi */ def xName: String = { if (isNameStart(ch)) { - do { + while (isNameChar(ch)) { putChar(ch); nextch; - } while (isNameChar(ch)); + } val n = cbuf.toString().intern(); cbuf.setLength(0); n @@ -574,7 +694,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt } /** skip optional space S? */ - def xSpaceOpt = while (isSpace(ch)) { nextch; }; + def xSpaceOpt = while (isSpace(ch) && !eof) { nextch; }; /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ def xSpace = { @@ -681,53 +801,157 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi // dtd parsing // - def intSubset(): Unit = { - //Console.println("(DEBUG) intSubset()"); - xSpace; - while (']' != ch) { + def extSubset(): Unit = { + var textdecl:Tuple2[Option[String],Option[String]] = null; + if(ch=='<') { + nextch; + if(ch=='?') { + nextch; + textdecl = textDecl() + } else + markupDecl1(); + } + while(curInput.hasNext) { + markupDecl(); + } + } + + def markupDecl1() = { + def doInclude() = { + xToken('['); while(']' != ch) markupDecl(); nextch // ']' + } + def doIgnore() = { + xToken('['); while(']' != ch) nextch; nextch; // ']' + } + if('?' == ch) { + nextch; + xProcInstr; // simply ignore processing instructions! + } else { + xToken('!'); ch match { - case '%' => + case '-' => + xComment ; // ignore comments + + case 'E' => nextch; - handle.peReference(xName); - xToken(';'); - xSpace; - //peReference - case '<' => - nextch; + if ('L' == ch) { + nextch; + elementDecl() + } else + entityDecl(); - if('?' == ch) - xProcInstr; // simply ignore processing instructions! - else { - xToken('!'); - ch match { - case '-' => - xComment ; // ignore comments + case 'A' => + nextch; + attrDecl(); - case 'E' => - nextch; - if ('L' == ch) { - nextch; - elementDecl() - } else - entityDecl(); + case 'N' => + nextch; + notationDecl(); - case 'A' => + case '[' if inExtSubSet => + nextch; + xSpaceOpt; + ch match { + case '%' => nextch; - attrDecl(); - - case 'N' => + val ent = xName; + xToken(';'); + xSpaceOpt; + /* + Console.println("hello, pushing!"); + { + val test = replacementText(ent); + while(test.hasNext) + Console.print(test.next); + } */ + push(ent); + xSpaceOpt; + //Console.println("hello, getting name"); + val stmt = xName; + //Console.println("hello, got name"); + xSpaceOpt; + //Console.println("how can we be eof = "+eof); + + // eof = true because not external?! + if(!eof) + error("expected only INCLUDE or IGNORE"); + + pop(); + + + //Console.println("hello, popped"); + stmt.match { + // parameter entity + case "INCLUDE" => + doInclude(); + case "IGNORE" => + doIgnore() + } + case 'I' => nextch; - notationDecl(); + ch.match { + case 'G' => + nextch; + xToken("NORE"); + xSpaceOpt; + doIgnore() + case 'N' => + nextch; + xToken("NCLUDE"); + doInclude() + } } - } - xSpace; - case _ => - reportSyntaxError("unexpected character '"+ch+"'"); - nextch; + xToken(']'); + xToken('>'); + + case _ => + curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl"); + while(ch!='>') + nextch; + } } } + def markupDecl(): Unit = ch match { + /** parameter entity reference + * n-v: just create PE-reference + * v: "parse replacementText into NodeBuffer ?" + */ + case '%' => + nextch; + val ent = xName; + xToken(';'); + if(!isValidating) + handle.peReference(ent); + else { + //Console.println("pushed entity "+ent); + push(ent); + } + //peReference + case '<' => + nextch; + markupDecl1(); + + case _ if isSpace(ch) => + xSpace; + case _ => + //Console.println("still think am reading external: "+isReadingExternal); + reportSyntaxError("markupdecl: unexpected character '"+ch+"'"); + nextch; + } + + /** "rec-xml/#ExtSubset" pe references may not occur within markup + declarations + */ + def intSubset(): Unit = { + //Console.println("(DEBUG) intSubset()"); + xSpace; + while (']' != ch) { + markupDecl() + } + } + /** <! element := ELEMENT */ def elementDecl(): Unit = { @@ -837,7 +1061,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi } case '"' | '\'' => - val av = xAttributeValue(); + val av = xEntityValue(); xSpaceOpt; xToken('>'); if (isParameterEntity) @@ -887,15 +1111,31 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi } def push(entityName:String) = { + //Console.println("BEFORE PUSHING "+ch); + //Console.println("BEFORE PUSHING "+pos); + //Console.println("PUSHING "+entityName); inpStack = curInput :: inpStack; - curInput = this.dtd.replacementText(entityName); + curInput = replacementText(entityName); nextch; } + /* + def push(src:Source) = { + curInput = src; + nextch; + } + */ def pop() = { + //Console.println("POPPING"); curInput = inpStack.head; inpStack = inpStack.tail; - nextch; + ch = curInput.ch; + pos = curInput.pos; + eof = !curInput.hasNext; + //Console.println("returned (popped), current ch = "+ch ) + //Console.println("POPPING ch now "+ch); + //Console.println("POPPING ch now "+pos); + //nextch; } } diff --git a/sources/scala/xml/parsing/ValidatingMarkupHandler.scala b/sources/scala/xml/parsing/ValidatingMarkupHandler.scala index a0ee7f4479..3de932cd1b 100644 --- a/sources/scala/xml/parsing/ValidatingMarkupHandler.scala +++ b/sources/scala/xml/parsing/ValidatingMarkupHandler.scala @@ -12,14 +12,9 @@ abstract class ValidatingMarkupHandler extends MarkupHandler { final override def attListDecl(name: String, attList: List[AttrDecl]): Unit = decls = AttListDecl( name, attList) :: decls; - final override def parameterEntityDecl(name: String, edef: EntityDef): Unit = - decls = ParameterEntityDecl( name, edef) :: decls; - - final override def parsedEntityDecl(name: String, edef: EntityDef): Unit = - decls = ParsedEntityDecl( name, edef) :: decls; - - final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = - decls = UnparsedEntityDecl( name, extID, notat) :: decls; + final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = { + decls = UnparsedEntityDecl( name, extID, notat) :: decls; + } final override def notationDecl(notat: String, extID: ExternalID): Unit = decls = NotationDecl( notat, extID) :: decls; |